activated new kelondroEcoTable file structure.

This data structure replaces almost all files in the PLASMA directory
also the collection.index and the LURL-db will be created as Eco-DB, if it does not exist before
existing Flex-databases will be used as they are (the is no data lost)
If you want to force the creation of a Eco-collection.index, simply delete the old index.
The Eco file system will only be used if there is enough memory.
The collection.index RAM limit is 200MB, if you have less, a flex-Table is createt.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4340 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 739f35d389
commit 94f21d9403

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5
# Release Configuration
releaseVersion=0.562
releaseVersion=0.563
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -80,6 +80,33 @@
<p><img src="PerformanceGraph.png" name="graph" /></p>
<p><strong>EcoTable RAM Index:</strong></p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td>Table</td>
<td>Key Chunk Size</td>
<td>Key Count</td>
<td>Key Used Memory</td>
<td>Value Chunk Size</td>
<td>Value Count</td>
<td>Value Used Memory</td>
</tr>
#{EcoList}#
<tr class="TableCellLight">
<td align="left" class="TableCellDark">#[tableIndexPath]#</td>
<td align="right">#[tableIndexChunkSize]#</td>
<td align="right">#[tableIndexCount]#</td>
<td align="right">#[tableIndexMem]#</td>
<td align="right">#[tableTailChunkSize]#</td>
<td align="right">#[tableTailCount]#</td>
<td align="right">#[tableTailMem]#</td>
</tr>
#{/EcoList}#
<tr class="TableCellDark">
<td colspan="9">Total Mem = #[EcoIndexTotalMem]# MB</td>
</tr>
</table>
<p><strong>FlexTable RAM Index:</strong></p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">

@ -51,6 +51,7 @@ import java.util.Map;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroCachedRecords;
import de.anomic.kelondro.kelondroEcoTable;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDomains;
@ -63,7 +64,7 @@ public class PerformanceMemory_p {
private static final long KB = 1024;
private static final long MB = 1024 * KB;
private static Map defaultSettings = null;
private static Map<String, String> defaultSettings = null;
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
@ -115,7 +116,7 @@ public class PerformanceMemory_p {
prop.putNum("memoryUsedNow", (memoryTotalNow - memoryFreeNow) / MB);
// write table for FlexTable index sizes
Iterator i = kelondroFlexTable.filenames();
Iterator<String> i = kelondroFlexTable.filenames();
String filename;
Map<String, String> map;
int p, c = 0;
@ -134,6 +135,32 @@ public class PerformanceMemory_p {
prop.put("TableList", c);
prop.putNum("TableIndexTotalMem", totalmem / (1024 * 1024d));
// write table for EcoTable index sizes
i = kelondroEcoTable.filenames();
c = 0;
totalmem = 0;
while (i.hasNext()) {
filename = (String) i.next();
prop.put("EcoList_" + c + "_tableIndexPath", ((p = filename.indexOf("DATA")) < 0) ? filename : filename.substring(p));
map = kelondroEcoTable.memoryStats(filename);
mem = Long.parseLong((String) map.get("tableIndexMem"));
totalmem += mem;
prop.put("EcoList_" + c + "_tableIndexMem", serverMemory.bytesToString(mem));
prop.put("EcoList_" + c + "_tableIndexChunkSize", map.get("tableIndexChunkSize"));
prop.putNum("EcoList_" + c + "_tableIndexCount", (String)map.get("tableIndexCount"));
mem = Long.parseLong((String) map.get("tableTailMem"));
totalmem += mem;
prop.put("EcoList_" + c + "_tableTailMem", serverMemory.bytesToString(mem));
prop.put("EcoList_" + c + "_tableTailChunkSize", map.get("tableTailChunkSize"));
prop.putNum("EcoList_" + c + "_tableTailCount", (String)map.get("tableTailCount"));
c++;
}
prop.put("EcoList", c);
prop.putNum("EcoIndexTotalMem", totalmem / (1024 * 1024d));
// write node cache table
i = kelondroCachedRecords.filenames();
c = 0;

@ -23,6 +23,21 @@
<Xmx>#[Xmx]#</Xmx>
<EcoTable>
#{EcoList}#
<entry>
<tableIndexPath>#[tableIndexPath]#</tableIndexPath>
<tableIndexChunkSize>#[tableIndexChunkSize]#</tableIndexChunkSize>
<tableIndexCount>#[tableIndexCount]#</tableIndexCount>
<tableIndexMem>#[tableIndexMem]#</tableIndexMem>
<tableTailChunkSize>#[tableTailChunkSize]#</tableTailChunkSize>
<tableTailCount>#[tableTailCount]#</tableTailCount>
<tableTailMem>#[tableTailMem]#</tableTailMem>
</entry>
#{/EcoList}#
<EcoIndexTotalMem>#[EcoIndexTotalMem]#</EcoIndexTotalMem>
</EcoTable>
<FlexTable>
#{TableList}#
<entry>

@ -15,7 +15,6 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroEcoTable;
import de.anomic.kelondro.kelondroFlexSplitTable;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroIntBytesMap;
@ -23,6 +22,7 @@ import de.anomic.kelondro.kelondroProfile;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.kelondro.kelondroSQLTable;
import de.anomic.kelondro.kelondroSplitTable;
import de.anomic.kelondro.kelondroSplittedTree;
import de.anomic.kelondro.kelondroTree;
import de.anomic.server.serverInstantThread;
@ -212,10 +212,10 @@ public class dbtest {
}
if (dbe.equals("kelondroFlexSplitTable")) {
File tablepath = new File(tablename).getParentFile();
table = new kelondroFlexSplitTable(tablepath, new File(tablename).getName(), preload, testRow, true);
table = new kelondroSplitTable(tablepath, new File(tablename).getName(), preload, testRow, true);
}
if (dbe.equals("kelondroEcoTable")) {
table = new kelondroEcoTable(new File(tablename), testRow, 100);
table = new kelondroEcoTable(new File(tablename), testRow, true, 100);
}
if (dbe.equals("mysql")) {
table = new kelondroSQLTable("mysql", testRow);

@ -54,6 +54,8 @@ import de.anomic.yacy.yacyURL;
public class kelondroCollectionIndex {
private static final int serialNumber = 0;
private static final long minimumRAM4Eco = 200 * 1024 * 1024;
private static final int EcoFSBufferSize = 1000;
private kelondroIndex index;
private int keylength;
@ -74,8 +76,6 @@ public class kelondroCollectionIndex {
private static final int idx_col_lastread = 6; // a time stamp, update time in days since 1.1.2000
private static final int idx_col_lastwrote = 7; // a time stamp, update time in days since 1.1.2000
private static final boolean useEcoTable = false;
private static kelondroRow indexRow(int keylength, kelondroByteOrder payloadOrder) {
return new kelondroRow(
"byte[] key-" + keylength + "," +
@ -156,8 +156,8 @@ public class kelondroCollectionIndex {
serverLog.logFine("STARTUP", "STARTED INITIALIZATION OF NEW COLLECTION INDEX WITH " + initialSpace + " ENTRIES. THIS WILL TAKE SOME TIME");
// initialize (new generation) index table from file
if (useEcoTable) {
index = new kelondroEcoTable(f, indexRow(keyLength, indexOrder), 100);
if (serverMemory.request(minimumRAM4Eco, false)) {
index = new kelondroEcoTable(f, indexRow(keyLength, indexOrder), true, EcoFSBufferSize);
} else {
index = new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keyLength, indexOrder), initialSpace, true);
}
@ -255,7 +255,7 @@ public class kelondroCollectionIndex {
return theindex;
} else {
// open a ecotable
return new kelondroEcoTable(f, indexRow(keylength, indexOrder), 100);
return new kelondroEcoTable(f, indexRow(keylength, indexOrder), true, EcoFSBufferSize);
}
}

@ -61,6 +61,7 @@ import java.util.Iterator;
public class kelondroDyn {
private static final int counterlen = 8;
private static final int EcoFSBufferSize = 20;
protected int keylen;
private int reclen;
@ -94,7 +95,15 @@ public class kelondroDyn {
}
} else {
if (file.exists()) {
if (file.isDirectory()) {
fbi = new kelondroFlexTable(file.getParentFile(), file.getName(), 10000, rowdef, 0, resetOnFail);
} else {
fbi = new kelondroEcoTable(file, rowdef, false, EcoFSBufferSize);
}
} else {
fbi = new kelondroEcoTable(file, rowdef, false, EcoFSBufferSize);
}
}
this.index = (useObjectCache) ? (kelondroIndex) new kelondroCache(fbi) : fbi;
this.keylen = key;
@ -109,7 +118,11 @@ public class kelondroDyn {
if (usetree) {
file.delete();
} else {
if (file.isDirectory()) {
kelondroFlexTable.delete(file.getParentFile(), file.getName());
} else {
file.delete();
}
}
}

@ -56,7 +56,7 @@ public class kelondroEcoFS {
private int cacheindex, cachecount, buffercount; // number of entries in buffer
private byte[] cache, buffer, zero;
private static final int maxBuffer = 512;
private static final int maxBuffer = 4 * 1024; // stay below hard disc cache (is that necessary?)
public kelondroEcoFS(File tablefile, int recordsize) throws IOException {

@ -29,8 +29,10 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroRow.Entry;
@ -58,7 +60,7 @@ public class kelondroEcoTable implements kelondroIndex {
private kelondroRow rowdef, taildef;
private int buffersize;
public kelondroEcoTable(File tablefile, kelondroRow rowdef, int buffersize) throws IOException {
public kelondroEcoTable(File tablefile, kelondroRow rowdef, boolean useTailCache, int buffersize) {
this.rowdef = rowdef;
this.buffersize = buffersize;
assert rowdef.primaryKeyIndex == 0;
@ -82,18 +84,14 @@ public class kelondroEcoTable implements kelondroIndex {
try { fos.close(); } catch (IOException e) {}
}
// open an existing table file
try {
// open an existing table file
this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(tablefile, rowdef.objectsize), this.buffersize);
} catch (FileNotFoundException e) {
// should never happen
e.printStackTrace();
}
// initialize index and copy table
int records = file.size();
long neededRAM4table = records * taildef.objectsize * 3 / 2;
table = (serverMemory.request(neededRAM4table, true)) ? new kelondroRowSet(taildef, records + 1) : null;
table = ((useTailCache) && (serverMemory.request(neededRAM4table, true))) ? new kelondroRowSet(taildef, records + 1) : null;
index = new kelondroBytesIntMap(rowdef.primaryKeyLength, rowdef.objectOrder, records + 1);
// read all elements from the file into the copy table
@ -110,6 +108,14 @@ public class kelondroEcoTable implements kelondroIndex {
// write the tail into the table
if (table != null) table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true));
}
} catch (FileNotFoundException e) {
// should never happen
e.printStackTrace();
throw new kelondroException(e.getMessage());
} catch (IOException e) {
e.printStackTrace();
throw new kelondroException(e.getMessage());
}
// track this table
tableTracker.put(tablefile.toString(), this);
@ -120,6 +126,35 @@ public class kelondroEcoTable implements kelondroIndex {
return kelondroEcoFS.tableSize(tablefile, recordsize);
}
public static final Iterator<String> filenames() {
// iterates string objects; all file names from record tracker
return tableTracker.keySet().iterator();
}
public static final Map<String, String> memoryStats(String filename) {
// returns a map for each file in the tracker;
// the map represents properties for each record objects,
// i.e. for cache memory allocation
kelondroEcoTable theEcoTable = tableTracker.get(filename);
return theEcoTable.memoryStats();
}
private final Map<String, String> memoryStats() {
// returns statistical data about this object
HashMap<String, String> map = new HashMap<String, String>();
map.put("tableIndexChunkSize", Integer.toString(index.row().objectsize));
map.put("tableIndexCount", Integer.toString(index.size()));
map.put("tableIndexMem", Integer.toString((int) (index.row().objectsize * index.size() * kelondroRowCollection.growfactor)));
map.put("tableTailChunkSize", (table == null) ? "0" : Integer.toString(table.row().objectsize));
map.put("tableTailCount", (table == null) ? "0" : Integer.toString(table.size()));
map.put("tableTailMem", (table == null) ? "0" : Integer.toString((int) (table.row().objectsize * table.size() * kelondroRowCollection.growfactor)));
return map;
}
public static int staticRAMIndexNeed(File f, kelondroRow rowdef) {
return (int) ((rowdef.primaryKeyLength + 4) * tableSize(f, rowdef.objectsize) * kelondroRowSet.growfactor);
}
public synchronized void addUnique(Entry row) throws IOException {
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
@ -396,7 +431,7 @@ public class kelondroEcoTable implements kelondroIndex {
public static kelondroIndex testTable(File f, String testentities) throws IOException {
if (f.exists()) f.delete();
kelondroRow rowdef = new kelondroRow("byte[] a-4, byte[] b-4", kelondroNaturalOrder.naturalOrder, 0);
kelondroIndex tt = new kelondroEcoTable(f, rowdef, 100);
kelondroIndex tt = new kelondroEcoTable(f, rowdef, true, 100);
byte[] b;
kelondroRow.Entry row = rowdef.newEntry();
for (int i = 0; i < testentities.length(); i++) {

@ -1,4 +1,4 @@
// kelondroFlexSplitTable.java
// kelondroSplitTable.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 12.10.2006 on http://www.anomic.de
//
@ -36,10 +36,16 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class kelondroFlexSplitTable implements kelondroIndex {
import de.anomic.server.serverMemory;
// this is a set of kelondroFlex tables
// the set is divided into FlexTables with different entry date
public class kelondroSplitTable implements kelondroIndex {
// this is a set of kelondro tables
// the set is divided into tables with different entry date
// the table type can be either kelondroFlex or kelondroEco
private static final long minimumRAM4Eco = 80 * 1024 * 1024;
private static final int EcoFSBufferSize = 20;
private HashMap<String, kelondroIndex> tables; // a map from a date string to a kelondroIndex object
private kelondroRow rowdef;
@ -47,7 +53,7 @@ public class kelondroFlexSplitTable implements kelondroIndex {
private String tablename;
private kelondroOrder<kelondroRow.Entry> entryOrder;
public kelondroFlexSplitTable(File path, String tablename, long preloadTime, kelondroRow rowdef, boolean resetOnFail) {
public kelondroSplitTable(File path, String tablename, long preloadTime, kelondroRow rowdef, boolean resetOnFail) {
this.path = path;
this.tablename = tablename;
this.rowdef = rowdef;
@ -60,19 +66,25 @@ public class kelondroFlexSplitTable implements kelondroIndex {
// initialized tables map
this.tables = new HashMap<String, kelondroIndex>();
if (!(path.exists())) path.mkdirs();
String[] dir = path.list();
String[] tablefile = path.list();
String date;
// first pass: find tables
HashMap<String, Long> t = new HashMap<String, Long>();
long ram, sum = 0;
for (int i = 0; i < dir.length; i++) {
if ((dir[i].startsWith(tablename)) &&
(dir[i].charAt(tablename.length()) == '.') &&
(dir[i].length() == tablename.length() + 7)) {
ram = kelondroFlexTable.staticRAMIndexNeed(path, dir[i], rowdef);
File f;
for (int i = 0; i < tablefile.length; i++) {
if ((tablefile[i].startsWith(tablename)) &&
(tablefile[i].charAt(tablename.length()) == '.') &&
(tablefile[i].length() == tablename.length() + 7)) {
f = new File(path, tablefile[i]);
if (f.isDirectory()) {
ram = kelondroFlexTable.staticRAMIndexNeed(path, tablefile[i], rowdef);
} else {
ram = kelondroEcoTable.staticRAMIndexNeed(f, rowdef);
}
if (ram > 0) {
t.put(dir[i], new Long(ram));
t.put(tablefile[i], new Long(ram));
sum += ram;
}
}
@ -101,7 +113,13 @@ public class kelondroFlexSplitTable implements kelondroIndex {
// open next biggest table
t.remove(maxf);
date = maxf.substring(tablename.length() + 1);
f = new File(path, maxf);
if (f.isDirectory()) {
// this is a kelonodroFlex table
table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail));
} else {
table = new kelondroEcoTable(f, rowdef, false, EcoFSBufferSize);
}
tables.put(date, table);
}
}
@ -111,7 +129,8 @@ public class kelondroFlexSplitTable implements kelondroIndex {
String[] l = path.list();
for (int i = 0; i < l.length; i++) {
if (l[i].startsWith(tablename)) {
kelondroFlexTable.delete(path, l[i]);
File f = new File(path, l[i]);
if (f.isDirectory()) kelondroFlexTable.delete(path, l[i]); else f.delete();
}
}
init(-1, true);
@ -202,7 +221,13 @@ public class kelondroFlexSplitTable implements kelondroIndex {
kelondroIndex table = (kelondroIndex) tables.get(suffix);
if (table == null) {
// make new table
if (serverMemory.request(minimumRAM4Eco, true)) {
// enough memory for a ecoTable
table = new kelondroEcoTable(new File(path, tablename + "." + suffix), rowdef, false, EcoFSBufferSize);
} else {
// use the flex table
table = new kelondroFlexTable(path, tablename + "." + suffix, -1, rowdef, 0, true);
}
tables.put(suffix, table);
}
table.put(row);
@ -233,7 +258,13 @@ public class kelondroFlexSplitTable implements kelondroIndex {
kelondroIndex table = (kelondroIndex) tables.get(suffix);
if (table == null) {
// make new table
if (serverMemory.request(minimumRAM4Eco, true)) {
// enough memory for a ecoTable
table = new kelondroEcoTable(new File(path, tablename + "." + suffix), rowdef, false, EcoFSBufferSize);
} else {
// use the flex table
table = new kelondroFlexTable(path, tablename + "." + suffix, -1, rowdef, 0, true);
}
tables.put(suffix, table);
}
table.addUnique(row);

@ -75,8 +75,8 @@ public class plasmaCrawlQueues {
log.logConfig("Starting Crawling Management");
noticeURL = new plasmaCrawlNURL(plasmaPath);
//errorURL = new plasmaCrawlZURL(); // fresh error DB each startup; can be hold in RAM and reduces IO;
errorURL = new plasmaCrawlZURL(plasmaPath, "urlError1.db", true);
delegatedURL = new plasmaCrawlZURL(plasmaPath, "urlDelegated1.db", false);
errorURL = new plasmaCrawlZURL(plasmaPath, "urlError2.db", true);
delegatedURL = new plasmaCrawlZURL(plasmaPath, "urlDelegated2.db", false);
}

@ -53,8 +53,7 @@ import java.util.TreeMap;
import de.anomic.kelondro.kelondroAbstractRecords;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroEcoTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroStack;
@ -63,8 +62,9 @@ import de.anomic.yacy.yacySeedDB;
public class plasmaCrawlBalancer {
private static final String stackSuffix = "8.stack";
private static final String indexSuffix = "8.db";
private static final String stackSuffix = "9.stack";
private static final String indexSuffix = "9.db";
private static final int EcoFSBufferSize = 200;
// a shared domainAccess map for all balancers
private static final Map<String, domaccess> domainAccess = Collections.synchronizedMap(new HashMap<String, domaccess>());
@ -140,16 +140,14 @@ public class plasmaCrawlBalancer {
private void openFileIndex() {
cacheStacksPath.mkdirs();
urlFileIndex = new kelondroCache(new kelondroFlexTable(cacheStacksPath, stackname + indexSuffix, -1, plasmaCrawlEntry.rowdef, 0, true));
urlFileIndex = new kelondroEcoTable(new File(cacheStacksPath, stackname + indexSuffix), plasmaCrawlEntry.rowdef, true, EcoFSBufferSize);
}
private void resetFileIndex() {
if (urlFileIndex != null) {
urlFileIndex.close();
urlFileIndex = null;
kelondroFlexTable.delete(cacheStacksPath, stackname + indexSuffix);
//File cacheFile = new File(cacheStacksPath, stackname + indexSuffix);
//cacheFile.delete();
new File(cacheStacksPath, stackname + indexSuffix).delete();
}
openFileIndex();
}

@ -71,7 +71,7 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexSplitTable;
import de.anomic.kelondro.kelondroSplitTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
@ -99,7 +99,7 @@ public final class plasmaCrawlLURL {
public plasmaCrawlLURL(File indexPath, long preloadTime) {
super();
urlIndexFile = new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", preloadTime, indexURLEntry.rowdef, false);
urlIndexFile = new kelondroSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", preloadTime, indexURLEntry.rowdef, false);
// init result stacks
externResultStack = new LinkedList<String>();
@ -147,7 +147,7 @@ public final class plasmaCrawlLURL {
}
public synchronized int writeCacheSize() {
if (urlIndexFile instanceof kelondroFlexSplitTable) return ((kelondroFlexSplitTable) urlIndexFile).writeBufferSize();
if (urlIndexFile instanceof kelondroSplitTable) return ((kelondroSplitTable) urlIndexFile).writeBufferSize();
if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).writeBufferSize();
return 0;
}

@ -57,9 +57,8 @@ import java.util.LinkedList;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroEcoTable;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroFlexWidthArray;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
@ -72,6 +71,9 @@ import de.anomic.yacy.yacyURL;
public final class plasmaCrawlStacker extends Thread {
private static final int EcoFSBufferSize = 20;
private static String stackfile = "urlNoticeStacker9.db";
// keys for different database types
public static final int QUEUE_DB_TYPE_RAM = 0;
public static final int QUEUE_DB_TYPE_TREE = 1;
@ -281,10 +283,11 @@ public final class plasmaCrawlStacker extends Thread {
// do nothing..
}
if (this.dbtype == QUEUE_DB_TYPE_FLEX) {
kelondroFlexWidthArray.delete(cacheStacksPath, "urlNoticeStacker8.db");
new File(cacheStacksPath, stackfile).delete();
//kelondroFlexWidthArray.delete(cacheStacksPath, stackfile);
}
if (this.dbtype == QUEUE_DB_TYPE_TREE) {
File cacheFile = new File(cacheStacksPath, "urlNoticeStacker8.db");
File cacheFile = new File(cacheStacksPath, stackfile);
cacheFile.delete();
}
}
@ -296,16 +299,19 @@ public final class plasmaCrawlStacker extends Thread {
this.urlEntryCache = new kelondroRowSet(plasmaCrawlEntry.rowdef, 0);
}
if (this.dbtype == QUEUE_DB_TYPE_FLEX) {
String newCacheName = "urlNoticeStacker8.db";
cacheStacksPath.mkdirs();
File f = new File(cacheStacksPath, stackfile);
try {
this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true));
this.urlEntryCache = new kelondroEcoTable(f, plasmaCrawlEntry.rowdef, true, EcoFSBufferSize);
//this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true));
} catch (Exception e) {
e.printStackTrace();
// kill DB and try again
kelondroFlexTable.delete(cacheStacksPath, newCacheName);
f.delete();
//kelondroFlexTable.delete(cacheStacksPath, newCacheName);
try {
this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true));
this.urlEntryCache = new kelondroEcoTable(f, plasmaCrawlEntry.rowdef, true, EcoFSBufferSize);
//this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true));
} catch (Exception ee) {
ee.printStackTrace();
System.exit(-1);
@ -313,7 +319,7 @@ public final class plasmaCrawlStacker extends Thread {
}
}
if (this.dbtype == QUEUE_DB_TYPE_TREE) {
File cacheFile = new File(cacheStacksPath, "urlNoticeStacker8.db");
File cacheFile = new File(cacheStacksPath, stackfile);
cacheFile.getParentFile().mkdirs();
this.urlEntryCache = new kelondroCache(kelondroTree.open(cacheFile, true, preloadTime, plasmaCrawlEntry.rowdef));
}

@ -33,6 +33,7 @@ import java.util.Iterator;
import java.util.LinkedList;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroEcoTable;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
@ -43,6 +44,8 @@ import de.anomic.yacy.yacyURL;
public class plasmaCrawlZURL {
private static final int EcoFSBufferSize = 200;
public final static kelondroRow rowdef = new kelondroRow(
"String urlhash-" + yacySeedDB.commonHashLength + ", " + // the url's hash
"String executor-" + yacySeedDB.commonHashLength + ", " + // the crawling executor
@ -55,13 +58,19 @@ public class plasmaCrawlZURL {
// the class object
private kelondroIndex urlIndex = null;
private LinkedList stack = new LinkedList(); // strings: url
private LinkedList<String> stack = new LinkedList<String>(); // strings: url
public plasmaCrawlZURL(File cachePath, String tablename, boolean startWithEmptyFile) {
// creates a new ZURL in a file
cachePath.mkdirs();
if (startWithEmptyFile) kelondroFlexTable.delete(cachePath, tablename);
urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true);
File f = new File(cachePath, tablename);
if (startWithEmptyFile) {
if (f.exists()) {
if (f.isDirectory()) kelondroFlexTable.delete(cachePath, tablename); else f.delete();
}
}
urlIndex = new kelondroEcoTable(f, rowdef, true, EcoFSBufferSize);
//urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true);
}
public plasmaCrawlZURL() {
@ -123,10 +132,6 @@ public class plasmaCrawlZURL {
}
}
public boolean getUseNewDB() {
return (urlIndex instanceof kelondroFlexTable);
}
public boolean exists(String urlHash) {
try {
return urlIndex.has(urlHash.getBytes());
@ -235,9 +240,9 @@ public class plasmaCrawlZURL {
}
public class kiter implements Iterator {
public class kiter implements Iterator<Entry> {
// enumerates entry elements
Iterator i;
Iterator<kelondroRow.Entry> i;
boolean error = false;
public kiter(boolean up, String firstHash) throws IOException {
@ -250,7 +255,7 @@ public class plasmaCrawlZURL {
return i.hasNext();
}
public Object next() throws RuntimeException {
public Entry next() throws RuntimeException {
kelondroRow.Entry e = (kelondroRow.Entry) i.next();
if (e == null) return null;
try {
@ -266,7 +271,7 @@ public class plasmaCrawlZURL {
}
public Iterator entries(boolean up, String firstHash) throws IOException {
public Iterator<Entry> entries(boolean up, String firstHash) throws IOException {
// enumerates entry elements
return new kiter(up, firstHash);
}

Loading…
Cancel
Save