added options to switch on or off the kelondroFlexTable for NURL, EURL and PreNURL

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2456 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent c26da4893b
commit b7f4a1521b

@ -60,12 +60,12 @@ Nightly builds from compiles out of SVN can be obtained from <a href="http://lat
<li>Generic release of YaCy (all platforms with J2SE 1.4.2: Linux, Mac OS X, Windows, Solaris):</li>
<ul>
<li><tt>from yacy.net&nbsp;&nbsp;&nbsp;: <a href="http://www.yacy.net/yacy/release/yacy_v0.46_20060823_2442.tar.gz"><tt>yacy_v0.46_20060823_2442.tar.gz</tt></a></tt></li>
<li><tt>from BerliOS.de&nbsp;: <a href="http://download.berlios.de/yacy/yacy_v0.45_20060501_2046.tar.gz"><tt>yacy_v0.45_20060501_2046.tar.gz</tt></a></tt></li><br><br>
<li><tt>from BerliOS.de&nbsp;: <a href="http://download.berlios.de/yacy/yacy_v0.46_20060823_2442.tar.gz"><tt>yacy_v0.46_20060823_2442.tar.gz</tt></a></tt></li><br><br>
</ul>
<li>Windows-flavour release of YaCy (same code as generic release, but with convenient Windows-Installer):</li>
<ul>
<li><tt>from yacy.net&nbsp;&nbsp;&nbsp;: <a href="http://www.yacy.net/yacy/release/yacy_v0.45_20060501_2049.exe"><tt>yacy_v0.45_20060501_2049.exe</tt></a></tt></li>
<li><tt>from BerliOS.de&nbsp;: <a href="http://download.berlios.de/yacy/yacy_v0.45_20060501_2049.exe"><tt>yacy_v0.45_20060501_2049.exe</tt></a></tt></li>
<li><tt>from yacy.net&nbsp;&nbsp;&nbsp;: <a href="http://www.yacy.net/yacy/release/yacy_v0.46_20060823_2442.exe"><tt>yacy_v0.46_20060823_2442.exe</tt></a></tt></li>
<li><tt>from BerliOS.de&nbsp;: <a href="http://download.berlios.de/yacy/yacy_v0.46_20060823_2442.exe"><tt>yacy_v0.46_20060823_2442.exe</tt></a></tt></li>
</ul>
</ul>
</p>

@ -184,7 +184,7 @@ public class dbtest {
}
if (dbe.equals("kelondroFlexTable")) {
File tablepath = new File(tablename).getParentFile();
table = new kelondroFlexTable(tablepath, new File(tablename).getName(), kelondroBase64Order.enhancedCoder, buffer, preload, testRow);
table = new kelondroFlexTable(tablepath, new File(tablename).getName(), buffer, preload, testRow, kelondroBase64Order.enhancedCoder);
}
if (dbe.equals("mysql")) {
table = new dbTable("mysql", testRow);

@ -91,7 +91,7 @@ public class kelondroCollectionIndex {
this.loadfactor = loadfactor;
// create index table
index = new kelondroFlexTable(path, filenameStub + ".index.table", indexOrder, buffersize, preloadTime, indexRow(keyLength));
index = new kelondroFlexTable(path, filenameStub + ".index.table", buffersize, preloadTime, indexRow(keyLength), indexOrder);
// save/check property file for this array
File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize());
@ -455,7 +455,7 @@ public class kelondroCollectionIndex {
collectionIndex.close();
// printout of index
kelondroFlexTable index = new kelondroFlexTable(path, filenameStub + ".index", kelondroNaturalOrder.naturalOrder, buffersize, preloadTime, indexRow(9));
kelondroFlexTable index = new kelondroFlexTable(path, filenameStub + ".index", buffersize, preloadTime, indexRow(9), kelondroNaturalOrder.naturalOrder);
index.print();
index.close();
} catch (IOException e) {

@ -69,6 +69,22 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro
}
}
public static kelondroFixedWidthArray open(File file, kelondroRow rowdef, int intprops) {
try {
return new kelondroFixedWidthArray(file, rowdef, intprops);
} catch (IOException e) {
file.delete();
try {
return new kelondroFixedWidthArray(file, rowdef, intprops);
} catch (IOException ee) {
e.printStackTrace();
ee.printStackTrace();
System.exit(-1);
return null;
}
}
}
public synchronized kelondroRow.Entry set(int index, kelondroRow.Entry rowentry) throws IOException {
// make room for element

@ -32,8 +32,8 @@ import java.util.Iterator;
public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex {
protected kelondroBytesIntMap index;
public kelondroFlexTable(File path, String tablename, kelondroOrder objectOrder, long buffersize, long preloadTime, kelondroRow rowdef) throws IOException {
public kelondroFlexTable(File path, String tablename, long buffersize, long preloadTime, kelondroRow rowdef, kelondroOrder objectOrder) throws IOException {
super(path, tablename, rowdef);
File newpath = new File(path, tablename);
File indexfile = new File(newpath, "col.000.index");
@ -129,35 +129,30 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
synchronized (index) {
int i = index.geti(key);
if (i < 0) return null;
// i may be greater than this.size(), because this table may have deleted entries
// the deleted entries are subtracted from the 'real' tablesize, so the size may be
// smaller than an index to a row entry
return super.get(i);
}
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException {
synchronized (index) {
int i = index.geti(row.getColBytes(0));
if (i < 0) {
index.puti(row.getColBytes(0), super.add(row));
return null;
}
return super.set(i, row);
}
}
public synchronized kelondroRow.Entry remove(byte[] key) throws IOException {
synchronized (index) {
int i = index.removei(key);
if (i < 0) return null;
kelondroRow.Entry r = super.get(i);
kelondroRow.Entry r;
r = super.get(i);
super.remove(i);
return r;
}
}
public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {

@ -104,8 +104,41 @@ public class kelondroFlexWidthArray implements kelondroArray {
}
}
public static void delete(File path, String tablename) {
File tabledir = new File(path, tablename);
if ((tabledir.exists()) && (!(tabledir.isDirectory()))) {
tabledir.delete();
return;
}
String[] files = tabledir.list();
for (int i = 0; i < files.length; i++) {
new File(tabledir, files[i]).delete();
}
tabledir.delete();
}
public static kelondroFlexWidthArray open(File path, String tablename, kelondroRow rowdef) {
try {
return new kelondroFlexWidthArray(path, tablename, rowdef);
} catch (IOException e) {
kelondroFlexWidthArray.delete(path, tablename);
try {
return new kelondroFlexWidthArray(path, tablename, rowdef);
} catch (IOException ee) {
e.printStackTrace();
ee.printStackTrace();
System.exit(-1);
return null;
}
}
}
public void close() throws IOException {
for (int i = 0; i < col.length; i++) if (col[i] != null) col[i].close();
synchronized (col) {
for (int i = 0; i < col.length; i++) if (col[i] != null) col[i].close();
}
}
protected static final String colfilename(int start, int end) {
@ -222,9 +255,8 @@ public class kelondroFlexWidthArray implements kelondroArray {
String testname = "flextest";
try {
System.out.println("erster Test");
new File(f, testname).delete();
kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", rowdef);
kelondroFlexWidthArray.delete(f, testname);
kelondroFlexWidthArray k = kelondroFlexWidthArray.open(f, "flextest", rowdef);
k.add(k.row().newEntry(new byte[][]{"a".getBytes(), "xxxx".getBytes()}));
k.add(k.row().newEntry(new byte[][]{"b".getBytes(), "xxxx".getBytes()}));
k.remove(0);
@ -243,16 +275,22 @@ public class kelondroFlexWidthArray implements kelondroArray {
System.out.println("zweiter Test");
new File(f, testname).delete();
k = new kelondroFlexWidthArray(f, "flextest", rowdef);
kelondroFlexWidthArray.delete(f, testname);
//k = kelondroFlexWidthArray.open(f, "flextest", rowdef);
for (int i = 1; i <= 20; i = i * 2) {
System.out.println("LOOP: " + i);
k = kelondroFlexWidthArray.open(f, "flextest", rowdef);
for (int j = 0; j < i*2; j++) {
k.add(k.row().newEntry(new byte[][]{(Integer.toString(i) + "-" + Integer.toString(j)).getBytes(), "xxxx".getBytes()}));
}
k.close();
k = kelondroFlexWidthArray.open(f, "flextest", rowdef);
for (int j = 0; j < i; j++) {
k.remove(j);
k.remove(i*2 - j - 1);
}
k.close();
}
k = kelondroFlexWidthArray.open(f, "flextest", rowdef);
k.print();
k.col[0].print(true);
k.close();

@ -524,14 +524,14 @@ public class kelondroRecords {
}
protected final void deleteNode(Handle handle) throws IOException {
if (cacheSize != 0) {
if (cacheSize == 0) {
dispose(handle);
} else {
synchronized (cacheHeaders) {
cacheHeaders.removeb(handle.index);
cacheDelete++;
dispose(handle);
}
} else {
dispose(handle);
}
}
@ -980,11 +980,16 @@ public class kelondroRecords {
// delete element with handle h
// this element is then connected to the deleted-chain and can be
// re-used change counter
long sp = seekpos(h);
if (sp >= entryFile.length()) {
// a deletion of a node that cannot exist is wrong
throw new IOException("dispose: handle position " + h.index + "/" + sp + " exceeds file size " + entryFile.length());
}
synchronized (USAGE) {
USAGE.USEDC--;
USAGE.FREEC++;
// change pointer
entryFile.writeInt(seekpos(h), USAGE.FREEH.index); // extend free-list
entryFile.writeInt(sp, USAGE.FREEH.index); // extend free-list
// write new FREEH Handle link
USAGE.FREEH = h;
USAGE.write();

@ -88,7 +88,7 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
// init noticeUrlDB
this.log.logInfo("Initializing the source noticeUrlDB");
this.importNurlDB = new plasmaCrawlNURL(this.importPath, ((this.cacheSize*3)/4)/1024, preloadTime);
this.importNurlDB = new plasmaCrawlNURL(this.importPath, ((this.cacheSize*3)/4)/1024, preloadTime, false);
this.importStartSize = this.importNurlDB.size();
//int stackSize = this.importNurlDB.stackSize();

@ -77,7 +77,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
this.log.logFine("Initializing source word index db.");
this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, this.log, sb.getConfigBool("useCollectionIndex", false));
this.log.logFine("Initializing import URL db.");
this.importUrlDB = new plasmaCrawlLURL(new File(this.importPath, "urlHash.db"), (this.cacheSize/2)/1024, preloadTime / 2);
this.importUrlDB = new plasmaCrawlLURL(new File(this.importPath, "urlHash.db"), (this.cacheSize/2)/1024, preloadTime / 2, false);
this.importStartSize = this.importWordIndex.size();
}

@ -59,6 +59,7 @@ import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.tools.bitfield;
public class plasmaCrawlEURL extends indexURL {
@ -123,7 +124,7 @@ public class plasmaCrawlEURL extends indexURL {
* ======================================================================= */
private LinkedList rejectedStack = new LinkedList(); // strings: url
public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime) {
public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
super();
kelondroRow rowdef = new kelondroRow(
"String urlhash-" + urlHashLength + ", " + // the url's hash
@ -138,32 +139,20 @@ public class plasmaCrawlEURL extends indexURL {
"String failcause-" + urlErrorLength + ", " + // string describing load failure
"byte[] flags-" + urlFlagLength); // extra space
String newCacheName = "urlErr3.table";
cachePath.mkdirs();
try {
urlHashCache = new kelondroFlexTable(cachePath, newCacheName, kelondroBase64Order.enhancedCoder, bufferkb * 0x400, preloadTime, rowdef);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
/*
File oldCacheFile = new File(cachePath, "urlErr0.db");
if (oldCacheFile.exists()) try {
// open existing cache
kelondroTree tree = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
tree.assignRowdef(rowdef);
urlHashCache = tree;
} catch (IOException e) {
oldCacheFile.delete();
urlHashCache = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
if (newdb) {
String newCacheName = "urlErr3.table";
cachePath.mkdirs();
try {
urlHashCache = new kelondroFlexTable(cachePath, newCacheName, bufferkb * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
} else {
// create new cache
File oldCacheFile = new File(cachePath, "urlErr0.db");
oldCacheFile.getParentFile().mkdirs();
urlHashCache = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
urlHashCache = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef);
}
*/
}
public synchronized Entry newEntry(URL url, String referrer, String initiator, String executor,

@ -96,7 +96,7 @@ public final class plasmaCrawlLURL extends indexURL {
//public static Set damagedURLS = Collections.synchronizedSet(new HashSet());
public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime) {
public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
super();
kelondroRow rowdef = new kelondroRow(
"String urlhash-" + urlHashLength + ", " + // the url's hash
@ -962,7 +962,7 @@ public final class plasmaCrawlLURL extends indexURL {
} catch (MalformedURLException e) {}
if (args[0].equals("-l")) try {
// arg 1 is path to URLCache
final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1, 0);
final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1, 0, false);
final Iterator enu = urls.entries(true, false, null);
while (enu.hasNext()) {
((Entry) enu.next()).print();

@ -55,6 +55,7 @@ import java.util.Iterator;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroStack;
import de.anomic.kelondro.kelondroRow;
@ -102,9 +103,10 @@ public class plasmaCrawlNURL extends indexURL {
private File cacheStacksPath;
private int bufferkb;
private long preloadTime;
private boolean newdb;
initStackIndex initThead;
public plasmaCrawlNURL(File cachePath, int bufferkb, long preloadTime) {
public plasmaCrawlNURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
super();
this.cacheStacksPath = cachePath;
this.bufferkb = bufferkb;
@ -112,7 +114,7 @@ public class plasmaCrawlNURL extends indexURL {
// create a stack for newly entered entries
if (!(cachePath.exists())) cachePath.mkdir(); // make the path
this.newdb = newdb;
openHashCache();
File coreStackFile = new File(cachePath, "urlNoticeLocal0.stack");
@ -147,21 +149,20 @@ public class plasmaCrawlNURL extends indexURL {
}
private void openHashCache() {
/*
String newCacheName = "urlNotice3.table";
cacheStacksPath.mkdirs();
try {
urlHashCache = new kelondroFlexTable(cacheStacksPath, newCacheName, kelondroBase64Order.enhancedCoder, bufferkb * 0x400, preloadTime, rowdef);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
if (newdb) {
String newCacheName = "urlNotice4.table";
cacheStacksPath.mkdirs();
try {
urlHashCache = new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
} else {
File oldCacheFile = new File(cacheStacksPath, "urlNotice1.db");
oldCacheFile.getParentFile().mkdirs();
urlHashCache = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef);
}
*/
File oldCacheFile = new File(cacheStacksPath, "urlNotice1.db");
oldCacheFile.getParentFile().mkdirs();
urlHashCache = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef);
}
private void resetHashCache() {

@ -62,6 +62,8 @@ import de.anomic.http.httpc;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.plasma.plasmaCrawlEURL;
@ -81,10 +83,10 @@ public final class plasmaCrawlStacker {
//private boolean stopped = false;
private stackCrawlQueue queue;
public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime) {
public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, boolean newdb) {
this.sb = sb;
this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime);
this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, newdb);
this.log.logInfo(this.queue.size() + " entries in the stackCrawl queue.");
this.log.logInfo("STACKCRAWL thread initialized.");
@ -578,12 +580,13 @@ public final class plasmaCrawlStacker {
private final serverSemaphore readSync;
private final serverSemaphore writeSync;
private final LinkedList urlEntryHashCache;
private kelondroTree urlEntryCache;
private kelondroIndex urlEntryCache;
private File cacheStacksPath;
private int bufferkb;
private long preloadTime;
private boolean newdb;
public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime) {
public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, boolean newdb) {
// init the read semaphore
this.readSync = new serverSemaphore (0);
@ -597,6 +600,7 @@ public final class plasmaCrawlStacker {
this.cacheStacksPath = cacheStacksPath;
this.bufferkb = bufferkb;
this.preloadTime = preloadTime;
this.newdb = newdb;
openDB();
try {
@ -639,25 +643,43 @@ public final class plasmaCrawlStacker {
private void openDB() {
if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.getParentFile().mkdirs();
this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef);
if (this.newdb) {
String newCacheName = "urPreNotice1.table";
cacheStacksPath.mkdirs();
try {
this.urlEntryCache = new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
} else {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.getParentFile().mkdirs();
this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef);
}
}
public int cacheNodeChunkSize() {
return urlEntryCache.cacheNodeChunkSize();
if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheNodeChunkSize();
return 0;
}
public int cacheObjectChunkSize() {
return urlEntryCache.cacheObjectChunkSize();
public int[] cacheNodeStatus() {
if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheNodeStatus();
return new int[]{0,0,0,0,0,0,0,0,0,0};
}
public int[] cacheNodeStatus() {
return urlEntryCache.cacheNodeStatus();
public int cacheObjectChunkSize() {
if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheObjectChunkSize();
return 0;
}
public long[] cacheObjectStatus() {
return urlEntryCache.cacheObjectStatus();
if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheObjectStatus();
return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
}
public void close() throws IOException {

@ -260,7 +260,7 @@ public class plasmaRankingCRProcess {
kelondroCollectionIndex newseq = null;
if (newdb) {
File path = to_file.getParentFile(); // path to storage place
newacc = new kelondroFlexTable(path, CRG_accname, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, CRG_accrow);
newacc = new kelondroFlexTable(path, CRG_accname, 128 * 1024 * 1024, -1, CRG_accrow, kelondroBase64Order.enhancedCoder);
newseq = new kelondroCollectionIndex(path, CRG_seqname, 12, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, 2, CRG_colrow);
} else {
if (!(to_file.exists())) {

@ -405,7 +405,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// start indexing management
log.logConfig("Starting Indexing Management");
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL, ramLURL_time);
urlPool = new plasmaURLPool(plasmaPath,
ramLURL, getConfigBool("useFlexTableForLURL", false),
ramNURL, getConfigBool("useFlexTableForNURL", false),
ramEURL, getConfigBool("useFlexTableForEURL", true),
ramLURL_time);
wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false));
// set a high maximum cache size to current size; this is adopted later automatically
@ -583,7 +587,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
serverInstantThread.oneTimeJob(yc, "loadSeeds", yacyCore.log, 3000);
// initializing the stackCrawlThread
this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time);
this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, getConfigBool("useFlexTableForPreNURL", false));
//this.sbStackCrawlThread = new plasmaStackCrawlThread(this,this.plasmaPath,ramPreNURL);
//this.sbStackCrawlThread.start();

@ -57,10 +57,14 @@ public class plasmaURLPool {
public final plasmaCrawlNURL noticeURL;
public final plasmaCrawlEURL errorURL;
public plasmaURLPool(File plasmaPath, int ramLURL, int ramNURL, int ramEURL, long preloadTime) {
loadedURL = new plasmaCrawlLURL(plasmaPath, ramLURL, preloadTime);
noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, -1);
errorURL = new plasmaCrawlEURL(plasmaPath, ramEURL, -1);
public plasmaURLPool(File plasmaPath,
int ramLURL, boolean newLURL,
int ramNURL, boolean newNURL,
int ramEURL, boolean newEURL,
long preloadTime) {
loadedURL = new plasmaCrawlLURL(plasmaPath, ramLURL, preloadTime, newLURL);
noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, -1, newNURL);
errorURL = new plasmaCrawlEURL(plasmaPath, ramEURL, -1, newEURL);
}
public String exists(String hash) {

@ -700,10 +700,10 @@ public final class yacy {
// db containing all currently loades urls
int cache = dbcache * 1024; // in KB
log.logFine("URLDB-Caches: "+cache+" bytes");
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), cache, 10000);
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), cache, 10000, false);
// db used to hold all neede urls
plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.temp.db"), cache, 10000);
plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.temp.db"), cache, 10000, false);
Runtime rt = Runtime.getRuntime();
int cacheMem = (int)((serverMemory.max-rt.totalMemory())/1024)-(2*cache + 8*1024);
@ -940,7 +940,7 @@ public final class yacy {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000, 10000);
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, false, 1000, false, 1000, false, 10000);
HashMap doms = new HashMap();
System.out.println("Started domain list extraction from " + pool.loadedURL.size() + " url entries.");
System.out.println("a dump will be written after double-check of all extracted domains.");
@ -1055,7 +1055,7 @@ public final class yacy {
private static void urllist(String homePath, String source, boolean html, String targetName) {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000, 10000);
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, false, 1000, false, 1000, false, 10000);
File file = new File(root, targetName);
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
@ -1133,7 +1133,7 @@ public final class yacy {
serverLog log = new serverLog("URLDBCLEANUP");
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
try {
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304, 10000);
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304, 10000, false);
currentUrlDB.urldbcleanup();
currentUrlDB.close();
} catch (IOException e) {

@ -797,3 +797,6 @@ currentSkin=
# temporary flag for new database structure. set only true for testing
# ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION
useCollectionIndex=false
useFlexTableForNURL=false
useFlexTableForEURL=true
useFlexTableForPreNURL=false

Loading…
Cancel
Save