replaced kelondroTree db for NURLs by new kelondroFlexTable

The new database is only created if the old is deleted or does not exist

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2387 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 7fbba41962
commit 689bbcf9cd

@ -94,7 +94,7 @@ public class kelondroCollectionIndex {
this.loadfactor = loadfactor;
// create index table
index = new kelondroFlexTable(path, filenameStub + ".index", indexOrder, buffersize, preloadTime, indexRow(keyLength), true);
index = new kelondroFlexTable(path, filenameStub + ".index.table", indexOrder, buffersize, preloadTime, indexRow(keyLength), true);
// save/check property file for this array
File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize());

@ -35,7 +35,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
public kelondroFlexTable(File path, String tablename, kelondroOrder objectOrder, long buffersize, long preloadTime, kelondroRow rowdef, boolean exitOnFail) throws IOException {
super(path, tablename, rowdef, exitOnFail);
File newpath = new File(path, tablename + ".table");
File newpath = new File(path, tablename);
File indexfile = new File(newpath, "col.000.index");
kelondroIndex ki = null;
String description = new String(this.col[0].getDescription());
@ -106,7 +106,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException {
kelondroTree index = new kelondroTree(indexfile, buffersize, preloadTime, 10,
new kelondroRow("byte[] key-" + rowdef.width(0) + ", int reference-4"),
new kelondroRow("byte[] key-" + rowdef.width(0) + ", int reference-4 {b256}"),
objectOrder, 2, 80, true);
Iterator content = super.col[0].contentNodes(-1);
kelondroRecords.Node node;

@ -49,7 +49,7 @@ public class kelondroFlexWidthArray implements kelondroArray {
}
// check if table directory exists
File tabledir = new File(path, tablename + ".table");
File tabledir = new File(path, tablename);
if (tabledir.exists()) {
if (!(tabledir.isDirectory())) throw new IOException("path " + tabledir.toString() + " must be a directory");
} else {

@ -55,6 +55,7 @@ import java.util.Iterator;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroStack;
import de.anomic.kelondro.kelondroTree;
@ -165,19 +166,31 @@ public class plasmaCrawlNURL extends indexURL {
}
private void openHashCache() {
File cacheFile = new File(cacheStacksPath, "urlNotice1.db");
if (cacheFile.exists()) try {
File oldCacheFile = new File(cacheStacksPath, "urlNotice1.db");
File newCacheFile = new File(cacheStacksPath, "urlNotice2.table");
if (newCacheFile.exists()) try {
urlHashCache = new kelondroFlexTable(cacheStacksPath, "urlNotice2.table", kelondroBase64Order.enhancedCoder, bufferkb * 0x400, preloadTime, rowdef, true);
} catch (IOException e) {
e.printStackTrace();
oldCacheFile.delete();
urlHashCache = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} else if (oldCacheFile.exists()) try {
// open existing cache
kelondroTree tree = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
kelondroTree tree = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
tree.assignRowdef(rowdef);
urlHashCache = tree;
} catch (IOException e) {
cacheFile.delete();
urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
oldCacheFile.delete();
urlHashCache = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} else {
// create new cache
cacheFile.getParentFile().mkdirs();
urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
oldCacheFile.getParentFile().mkdirs();
try {
urlHashCache = new kelondroFlexTable(cacheStacksPath, "urlNotice2.table", kelondroBase64Order.enhancedCoder, bufferkb * 0x400, preloadTime, rowdef, true);
} catch (IOException e) {
e.printStackTrace();
urlHashCache = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
}
}
}
@ -506,9 +519,11 @@ public class plasmaCrawlNURL extends indexURL {
}
private void insertEntry(kelondroRow.Entry entry) throws IOException {
String urlstring = entry.getColString(2, null);
if (urlstring == null) throw new IOException ("url string is null");
this.hash = entry.getColString(0, null);
this.initiator = entry.getColString(1, null);
this.url = new URL(entry.getColString(2, null).trim());
this.url = new URL(urlstring);
this.referrer = (entry.empty(3)) ? dummyHash : entry.getColString(3, null);
this.name = (entry.empty(4)) ? "" : entry.getColString(4, null).trim();
this.loaddate = new Date(86400000 * entry.getColLong(5));

@ -62,6 +62,7 @@ import de.anomic.http.httpc;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.plasma.plasmaCrawlEURL;
@ -561,7 +562,7 @@ public final class plasmaCrawlStacker {
private final serverSemaphore readSync;
private final serverSemaphore writeSync;
private final LinkedList urlEntryHashCache;
private kelondroTree urlEntryCache;
private kelondroIndex urlEntryCache;
public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime) {
// init the read semaphore
@ -580,8 +581,9 @@ public final class plasmaCrawlStacker {
if (cacheFile.exists()) {
// open existing cache
try {
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
this.urlEntryCache.assignRowdef(plasmaCrawlNURL.rowdef);
kelondroTree tree = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
tree.assignRowdef(plasmaCrawlNURL.rowdef);
this.urlEntryCache = tree;
} catch (IOException e) {
cacheFile.delete();
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true);

@ -59,8 +59,8 @@ public class plasmaURLPool {
public plasmaURLPool(File plasmaPath, int ramLURL, int ramNURL, int ramEURL, long preloadTime) {
loadedURL = new plasmaCrawlLURL(new File(plasmaPath, "urlHash.db"), ramLURL, preloadTime);
noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, 0);
errorURL = new plasmaCrawlEURL(new File(plasmaPath, "urlErr0.db"), ramEURL, 0);
noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, -1);
errorURL = new plasmaCrawlEURL(new File(plasmaPath, "urlErr0.db"), ramEURL, -1);
}
public String exists(String hash) {

Loading…
Cancel
Save