enhanced caching in kelondroRecords and added better synchronization/finalizer

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@301 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 3be98f194d
commit 3e8ee5a46d

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4 javacTarget=1.4
# Release Configuration # Release Configuration
releaseVersion=0.382 releaseVersion=0.383
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -62,6 +62,15 @@ public class kelondroMergeIterator implements Iterator {
nextb(); nextb();
} }
public void finalize() {
// call finalizer of embedded objects
a = null;
b = null;
na = null;
nb = null;
comp = null;
}
private void nexta() { private void nexta() {
if (a.hasNext()) na = (String) a.next(); else na = null; if (a.hasNext()) na = (String) a.next(); else na = null;
} }

@ -125,10 +125,10 @@ public class kelondroRecords {
private int TXTPROPW; // size of a single TXTPROPS element private int TXTPROPW; // size of a single TXTPROPS element
// caching buffer // caching buffer
private HashMap cache; // the cache; holds Node objects private HashMap XcacheHeaders; // the cache; holds overhead values and key element
private int cachesize; // number of cache records private int XcacheSize; // number of cache records
private long startup; // startup time; for cache aging private long XcacheStartup; // startup time; for cache aging
private kelondroMScoreCluster cacheScore; // controls cache aging private kelondroMScoreCluster XcacheScore; // controls cache aging
public kelondroRecords(File file, long buffersize /* bytes */, public kelondroRecords(File file, long buffersize /* bytes */,
@ -147,16 +147,16 @@ public class kelondroRecords {
//kelondroRA raf = new kelondroBufferedRA(new kelondroFileRA(this.filename), 5000000, 1000); //kelondroRA raf = new kelondroBufferedRA(new kelondroFileRA(this.filename), 5000000, 1000);
//kelondroRA raf = new kelondroNIOFileRA(this.filename, false, 10000); //kelondroRA raf = new kelondroNIOFileRA(this.filename, false, 10000);
init(raf, ohbytec, ohhandlec, columns, FHandles, txtProps, txtPropWidth); init(raf, ohbytec, ohhandlec, columns, FHandles, txtProps, txtPropWidth);
this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); this.XcacheSize = (int) (buffersize / ((long) (overhead + columns[0])));
if (cachesize <= 0) { if (XcacheSize <= 0) {
cachesize = 0; XcacheSize = 0;
this.cache = null; this.XcacheHeaders = null;
this.cacheScore = null; this.XcacheScore = null;
} else { } else {
this.cache = new HashMap(); this.XcacheHeaders = new HashMap();
this.cacheScore = new kelondroMScoreCluster(); this.XcacheScore = new kelondroMScoreCluster();
} }
this.startup = System.currentTimeMillis(); this.XcacheStartup = System.currentTimeMillis();
} }
public kelondroRecords(kelondroRA ra, long buffersize /* bytes */, public kelondroRecords(kelondroRA ra, long buffersize /* bytes */,
@ -164,16 +164,16 @@ public class kelondroRecords {
int[] columns, int FHandles, int txtProps, int txtPropWidth) throws IOException { int[] columns, int FHandles, int txtProps, int txtPropWidth) throws IOException {
this.filename = null; this.filename = null;
init(ra, ohbytec, ohhandlec, columns, FHandles, txtProps, txtPropWidth); init(ra, ohbytec, ohhandlec, columns, FHandles, txtProps, txtPropWidth);
this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); this.XcacheSize = (int) (buffersize / ((long) (overhead + columns[0])));
if (cachesize <= 0) { if (XcacheSize <= 0) {
cachesize = 0; XcacheSize = 0;
this.cache = null; this.XcacheHeaders = null;
this.cacheScore = null; this.XcacheScore = null;
} else { } else {
this.cache = new HashMap(); this.XcacheHeaders = new HashMap();
this.cacheScore = new kelondroMScoreCluster(); this.XcacheScore = new kelondroMScoreCluster();
} }
this.startup = System.currentTimeMillis(); this.XcacheStartup = System.currentTimeMillis();
} }
private void init(kelondroRA ra, short ohbytec, short ohhandlec, private void init(kelondroRA ra, short ohbytec, short ohhandlec,
@ -246,31 +246,31 @@ public class kelondroRecords {
//kelondroRA raf = new kelondroBufferedRA(new kelondroFileRA(this.filename), 5000000, 1000); //kelondroRA raf = new kelondroBufferedRA(new kelondroFileRA(this.filename), 5000000, 1000);
//kelondroRA raf = new kelondroNIOFileRA(this.filename, (file.length() < 4000000), 10000); //kelondroRA raf = new kelondroNIOFileRA(this.filename, (file.length() < 4000000), 10000);
init(raf); init(raf);
this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); this.XcacheSize = (int) (buffersize / ((long) (overhead + COLWIDTHS[0])));
if (cachesize <= 0) { if (XcacheSize <= 0) {
cachesize = 0; XcacheSize = 0;
this.cache = null; this.XcacheHeaders = null;
this.cacheScore = null; this.XcacheScore = null;
} else { } else {
this.cache = new HashMap(); this.XcacheHeaders = new HashMap();
this.cacheScore = new kelondroMScoreCluster(); this.XcacheScore = new kelondroMScoreCluster();
} }
this.startup = System.currentTimeMillis(); this.XcacheStartup = System.currentTimeMillis();
} }
public kelondroRecords(kelondroRA ra, long buffersize) throws IOException{ public kelondroRecords(kelondroRA ra, long buffersize) throws IOException{
this.filename = null; this.filename = null;
init(ra); init(ra);
this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); this.XcacheSize = (int) (buffersize / ((long) (overhead + COLWIDTHS[0])));
if (cachesize <= 0) { if (XcacheSize <= 0) {
cachesize = 0; XcacheSize = 0;
this.cache = null; this.XcacheHeaders = null;
this.cacheScore = null; this.XcacheScore = null;
} else { } else {
this.cache = new HashMap(); this.XcacheHeaders = new HashMap();
this.cacheScore = new kelondroMScoreCluster(); this.XcacheScore = new kelondroMScoreCluster();
} }
this.startup = System.currentTimeMillis(); this.XcacheStartup = System.currentTimeMillis();
} }
private void init(kelondroRA ra) throws IOException{ private void init(kelondroRA ra) throws IOException{
@ -321,24 +321,28 @@ public class kelondroRecords {
} }
protected Node getNode(Handle handle, Node parentNode, int referenceInParent) { protected Node getNode(Handle handle, Node parentNode, int referenceInParent) {
if (cachesize == 0) return new Node(handle, parentNode, referenceInParent); if (XcacheSize == 0) return new Node(handle, parentNode, referenceInParent);
Node n = (Node) cache.get(handle); synchronized (XcacheHeaders) {
if (n == null) { Node n = (Node) XcacheHeaders.get(handle);
n = new Node(handle, parentNode, referenceInParent); if (n == null) {
checkCacheSpace(); n = new Node(handle, parentNode, referenceInParent);
return n; checkCacheSpace();
} else { return n;
//System.out.println("read from cache " + n.toString()); } else {
cacheScore.setScore(handle, (int) ((System.currentTimeMillis() - startup) / 1000)); //System.out.println("read from cache " + n.toString());
return n; XcacheScore.setScore(handle, (int) ((System.currentTimeMillis() - XcacheStartup) / 1000));
return n;
}
} }
} }
protected void deleteNode(Handle handle) throws IOException { protected void deleteNode(Handle handle) throws IOException {
if (cachesize != 0) { if (XcacheSize != 0) {
if (cache.get(handle) != null) synchronized (cache) { synchronized (XcacheHeaders) {
cacheScore.deleteScore(handle); if (XcacheHeaders.get(handle) != null) {
cache.remove(handle); XcacheScore.deleteScore(handle);
XcacheHeaders.remove(handle);
}
} }
} }
dispose(handle); dispose(handle);
@ -346,23 +350,20 @@ public class kelondroRecords {
private void checkCacheSpace() { private void checkCacheSpace() {
// check for space in cache // check for space in cache
if (cachesize == 0) return; // should be only called within a synchronized(XcacheHeaders) environment
if (cache.size() >= cachesize) { if (XcacheSize == 0) return;
while (XcacheHeaders.size() >= XcacheSize) {
// delete one entry // delete one entry
try { try {
Handle delkey = (Handle) cacheScore.getMinObject(); // error (see below) here Handle delkey = (Handle) XcacheScore.getMinObject(); // error (see below) here
synchronized (cache) { XcacheScore.deleteScore(delkey);
cacheScore.deleteScore(delkey); XcacheHeaders.remove(delkey);
cache.remove(delkey);
}
} catch (NoSuchElementException e) { } catch (NoSuchElementException e) {
System.out.println("strange kelondroRecords error: " + e.getMessage() + "; cachesize=" + cachesize + ", cache.size()=" + cache.size() + ", cacheScore.size()=" + cacheScore.size()); System.out.println("strange kelondroRecords error: " + e.getMessage() + "; cachesize=" + XcacheSize + ", cache.size()=" + XcacheHeaders.size() + ", cacheScore.size()=" + XcacheScore.size());
// this is a strange error and could be caused by internal java problems // this is a strange error and could be caused by internal java problems
// we simply clear the cache // we simply clear the cache
synchronized (cache) { this.XcacheScore = new kelondroMScoreCluster();
this.cacheScore = new kelondroMScoreCluster(); this.XcacheHeaders = new HashMap();
this.cache = new HashMap();
}
} }
} }
} }
@ -395,6 +396,7 @@ public class kelondroRecords {
private Handle[] ohHandle= null; // the overhead handles, OHHANDLEC values private Handle[] ohHandle= null; // the overhead handles, OHHANDLEC values
private byte[][] values = null; // an array of byte[] nodes is the value vector private byte[][] values = null; // an array of byte[] nodes is the value vector
private Handle handle = new Handle(NUL); // index of the entry, by default NUL means undefined private Handle handle = new Handle(NUL); // index of the entry, by default NUL means undefined
private Node(byte[][] v) { private Node(byte[][] v) {
// this defines an entry, but it does not lead to writing these entry values to the file // this defines an entry, but it does not lead to writing these entry values to the file
// storing this entry can be done using the 'save()' command // storing this entry can be done using the 'save()' command
@ -435,6 +437,14 @@ public class kelondroRecords {
this.ohHandle = null; this.ohHandle = null;
updateNode(); updateNode();
} }
public void finalize() {
ohBytes = null;
ohHandle = null;
values = null;
handle = null;
}
protected Handle handle() { protected Handle handle() {
// if this entry has an index, return it // if this entry has an index, return it
if (this.handle.index == NUL) throw new kelondroException(filename, "the entry has no index assigned"); if (this.handle.index == NUL) throw new kelondroException(filename, "the entry has no index assigned");
@ -590,6 +600,7 @@ public class kelondroRecords {
return values; return values;
} }
} }
protected synchronized void save() throws IOException { protected synchronized void save() throws IOException {
// this is called when an entry was defined with values only and not by retrieving with an index // this is called when an entry was defined with values only and not by retrieving with an index
// if this happens, nothing of the internal array values have been written to the file // if this happens, nothing of the internal array values have been written to the file
@ -641,16 +652,37 @@ public class kelondroRecords {
s = s + ":***LOAD ERROR***:" + e.getMessage(); s = s + ":***LOAD ERROR***:" + e.getMessage();
} }
return s; return s;
} }
private void updateNode() { private void updateNode() {
if (cachesize != 0) { if (this.handle == null) return;
if (!(cache.containsKey(handle))) checkCacheSpace(); if (this.values == null) return;
synchronized (cache) { if (this.ohBytes == null) return;
//System.out.println("updateNode " + this.toString()); if (this.ohHandle == null) return;
cache.put(handle, this); if (XcacheSize != 0) {
cacheScore.setScore(handle, (int) ((System.currentTimeMillis() - startup) / 1000)); synchronized (XcacheHeaders) {
//System.out.println("cache now: " + cache.toString()); // remember size to evaluate a cache size check need
} int sizeBefore = XcacheHeaders.size();
// generate cache entry
byte[][] cacheValue;
if (values == null) {
cacheValue = null;
} else {
cacheValue = new byte[values.length][];
cacheValue[0] = values[0];
for (int i = 1; i < values.length; i++) cacheValue[i] = null;
}
Node cacheNode = new Node(cacheValue);
cacheNode.handle = this.handle;
cacheNode.ohBytes = this.ohBytes;
cacheNode.ohHandle = this.ohHandle;
// store the cache entry
XcacheHeaders.put(cacheNode.handle, cacheNode);
XcacheScore.setScore(handle, (int) ((System.currentTimeMillis() - XcacheStartup) / 1000));
// delete the cache entry
cacheNode = null;
// check cache size
if (XcacheHeaders.size() > sizeBefore) checkCacheSpace();
}
} }
} }
} }

@ -777,6 +777,11 @@ public class kelondroTree extends kelondroRecords implements Comparator {
// now every parent node to the start node is on the stack // now every parent node to the start node is on the stack
} }
public void finalize() {
nextNode = null;
nodeStack = null;
}
public boolean hasNext() { public boolean hasNext() {
return nextNode != null; return nextNode != null;
} }
@ -924,6 +929,10 @@ public class kelondroTree extends kelondroRecords implements Comparator {
this.nodeIterator = nodeIterator; this.nodeIterator = nodeIterator;
} }
public void finalize() {
nodeIterator = null;
}
public boolean hasNext() { public boolean hasNext() {
return (nodeIterator.hasNext()); return (nodeIterator.hasNext());
} }

@ -50,13 +50,16 @@ import java.util.Map;
import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroMap; import de.anomic.kelondro.kelondroMap;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.serverCodings; import de.anomic.server.serverCodings;
public class plasmaCrawlProfile { public class plasmaCrawlProfile {
private kelondroMap profileTable; private kelondroMap profileTable;
private File profileTableFile;
public plasmaCrawlProfile(File profileTableFile) throws IOException { public plasmaCrawlProfile(File profileTableFile) throws IOException {
this.profileTableFile = profileTableFile;
if (profileTableFile.exists()) { if (profileTableFile.exists()) {
profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000)); profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000));
} else { } else {
@ -65,6 +68,20 @@ public class plasmaCrawlProfile {
} }
} }
private void resetDatabase() {
// deletes the profile database and creates a new one
if (profileTable != null) try {
profileTable.close();
} catch (IOException e) {}
if (!(profileTableFile.delete())) throw new RuntimeException("cannot delete crawl profile database");
try {
profileTableFile.getParentFile().mkdirs();
profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000, plasmaURL.urlCrawlProfileHandleLength, 2000));
} catch (IOException e){
e.printStackTrace();
}
}
public void close() { public void close() {
try { try {
profileTable.close(); profileTable.close();
@ -116,11 +133,17 @@ public class plasmaCrawlProfile {
boolean storeHTCache, boolean storeTXCache, boolean storeHTCache, boolean storeTXCache,
boolean localIndexing, boolean remoteIndexing, boolean localIndexing, boolean remoteIndexing,
boolean xsstopw, boolean xdstopw, boolean xpstopw) throws IOException { boolean xsstopw, boolean xdstopw, boolean xpstopw) throws IOException {
entry ne = new entry(name, startURL, generalFilter, specificFilter, entry ne = new entry(name, startURL, generalFilter, specificFilter,
generalDepth, specificDepth, generalDepth, specificDepth,
crawlingQ, storeHTCache, storeTXCache, localIndexing, remoteIndexing, crawlingQ, storeHTCache, storeTXCache, localIndexing, remoteIndexing,
xsstopw, xdstopw, xpstopw); xsstopw, xdstopw, xpstopw);
profileTable.set(ne.handle(), ne.map()); try {
profileTable.set(ne.handle(), ne.map());
} catch (kelondroException e) {
resetDatabase();
profileTable.set(ne.handle(), ne.map());
}
return ne; return ne;
} }

@ -604,7 +604,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// do a local crawl // do a local crawl
plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE); plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE);
String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
if (urlEntry.url() == null) { if (urlEntry.url() == null) {
log.logError(stats + ": urlEntry.url() == null"); log.logError(stats + ": urlEntry.url() == null");
return true; return true;
@ -648,7 +648,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// start a global crawl, if possible // start a global crawl, if possible
plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT); plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT);
String stats = "REMOTECRAWLTRIGGER[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; String stats = "REMOTECRAWLTRIGGER[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
if (urlEntry.url() == null) { if (urlEntry.url() == null) {
log.logError(stats + ": urlEntry.url() == null"); log.logError(stats + ": urlEntry.url() == null");
return true; return true;
@ -733,7 +733,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// we don't want to crawl a global URL globally, since WE are the global part. (from this point of view) // we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_REMOTE); plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_REMOTE);
String stats = "REMOTETRIGGEREDCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; String stats = "REMOTETRIGGEREDCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
if (urlEntry.url() == null) { if (urlEntry.url() == null) {
log.logError(stats + ": urlEntry.url() == null"); log.logError(stats + ": urlEntry.url() == null");
return false; return false;

@ -118,6 +118,7 @@ public final class plasmaWordIndex {
} }
public class iterateFiles implements Iterator { public class iterateFiles implements Iterator {
// Iterator of hash-strings in WORDS path
private ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries private ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries
private Comparator comp; // for string-compare private Comparator comp; // for string-compare

@ -184,7 +184,7 @@ public final class plasmaWordIndexAssortment {
private void resetDatabase() { private void resetDatabase() {
// deletes the assortment database and creates a new one // deletes the assortment database and creates a new one
try { if (assortments != null) try {
assortments.close(); assortments.close();
} catch (IOException e) {} } catch (IOException e) {}
if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database"); if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database");

Loading…
Cancel
Save