From b7f4a1521bb68baa0d347ba05d0da73dba1e5f07 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 24 Aug 2006 22:21:22 +0000 Subject: [PATCH] added options to switch on or off the kelondroFlexTable for NURL, EURL and PreNURL git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2456 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- doc/Download.html | 6 +-- source/dbtest.java | 2 +- .../kelondro/kelondroCollectionIndex.java | 4 +- .../kelondro/kelondroFixedWidthArray.java | 16 ++++++ .../de/anomic/kelondro/kelondroFlexTable.java | 13 ++--- .../kelondro/kelondroFlexWidthArray.java | 52 ++++++++++++++++--- .../de/anomic/kelondro/kelondroRecords.java | 13 +++-- .../dbImport/plasmaCrawlNURLImporter.java | 2 +- .../plasma/dbImport/plasmaDbImporter.java | 2 +- source/de/anomic/plasma/plasmaCrawlEURL.java | 37 +++++-------- source/de/anomic/plasma/plasmaCrawlLURL.java | 4 +- source/de/anomic/plasma/plasmaCrawlNURL.java | 33 ++++++------ .../de/anomic/plasma/plasmaCrawlStacker.java | 50 +++++++++++++----- .../anomic/plasma/plasmaRankingCRProcess.java | 2 +- .../de/anomic/plasma/plasmaSwitchboard.java | 8 ++- source/de/anomic/plasma/plasmaURLPool.java | 12 +++-- source/yacy.java | 10 ++-- yacy.init | 3 ++ 18 files changed, 173 insertions(+), 96 deletions(-) diff --git a/doc/Download.html b/doc/Download.html index 7fbb27dc5..dd3587d04 100644 --- a/doc/Download.html +++ b/doc/Download.html @@ -60,12 +60,12 @@ Nightly builds from compiles out of SVN can be obtained from yacy_v0.46_20060823_2442.tar.gz -
  • from BerliOS.de : yacy_v0.45_20060501_2046.tar.gz


  • +
  • from BerliOS.de : yacy_v0.46_20060823_2442.tar.gz


  • Windows-flavour release of YaCy (same code as generic release, but with convenient Windows-Installer):
  • diff --git a/source/dbtest.java b/source/dbtest.java index e11178027..2508ac466 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -184,7 +184,7 @@ public class dbtest { } if (dbe.equals("kelondroFlexTable")) { File tablepath = new File(tablename).getParentFile(); - table = new kelondroFlexTable(tablepath, new File(tablename).getName(), kelondroBase64Order.enhancedCoder, buffer, preload, testRow); + table = new kelondroFlexTable(tablepath, new File(tablename).getName(), buffer, preload, testRow, kelondroBase64Order.enhancedCoder); } if (dbe.equals("mysql")) { table = new dbTable("mysql", testRow); diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 1976d6aa2..4c515357c 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -91,7 +91,7 @@ public class kelondroCollectionIndex { this.loadfactor = loadfactor; // create index table - index = new kelondroFlexTable(path, filenameStub + ".index.table", indexOrder, buffersize, preloadTime, indexRow(keyLength)); + index = new kelondroFlexTable(path, filenameStub + ".index.table", buffersize, preloadTime, indexRow(keyLength), indexOrder); // save/check property file for this array File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize()); @@ -455,7 +455,7 @@ public class kelondroCollectionIndex { collectionIndex.close(); // printout of index - kelondroFlexTable index = new kelondroFlexTable(path, filenameStub + ".index", kelondroNaturalOrder.naturalOrder, buffersize, preloadTime, indexRow(9)); + kelondroFlexTable index = new kelondroFlexTable(path, filenameStub + ".index", buffersize, preloadTime, indexRow(9), kelondroNaturalOrder.naturalOrder); index.print(); index.close(); } catch (IOException e) { diff --git a/source/de/anomic/kelondro/kelondroFixedWidthArray.java b/source/de/anomic/kelondro/kelondroFixedWidthArray.java index d8b0c8df4..48ecbece5 100644 --- a/source/de/anomic/kelondro/kelondroFixedWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFixedWidthArray.java @@ -69,6 +69,22 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro } } + public static kelondroFixedWidthArray open(File file, kelondroRow rowdef, int intprops) { + try { + return new kelondroFixedWidthArray(file, rowdef, intprops); + } catch (IOException e) { + file.delete(); + try { + return new kelondroFixedWidthArray(file, rowdef, intprops); + } catch (IOException ee) { + e.printStackTrace(); + ee.printStackTrace(); + System.exit(-1); + return null; + } + } + } + public synchronized kelondroRow.Entry set(int index, kelondroRow.Entry rowentry) throws IOException { // make room for element diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index d935b8eaa..734b1a893 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -32,8 +32,8 @@ import java.util.Iterator; public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex { protected kelondroBytesIntMap index; - - public kelondroFlexTable(File path, String tablename, kelondroOrder objectOrder, long buffersize, long preloadTime, kelondroRow rowdef) throws IOException { + + public kelondroFlexTable(File path, String tablename, long buffersize, long preloadTime, kelondroRow rowdef, kelondroOrder objectOrder) throws IOException { super(path, tablename, rowdef); File newpath = new File(path, tablename); File indexfile = new File(newpath, "col.000.index"); @@ -129,35 +129,30 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } public synchronized kelondroRow.Entry get(byte[] key) throws IOException { - synchronized (index) { int i = index.geti(key); if (i < 0) return null; // i may be greater than this.size(), because this table may have deleted entries // the deleted entries are subtracted from the 'real' tablesize, so the size may be // smaller than an index to a row entry return super.get(i); - } } public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException { - synchronized (index) { int i = index.geti(row.getColBytes(0)); if (i < 0) { index.puti(row.getColBytes(0), super.add(row)); return null; } return super.set(i, row); - } } public synchronized kelondroRow.Entry remove(byte[] key) throws IOException { - synchronized (index) { int i = index.removei(key); if (i < 0) return null; - kelondroRow.Entry r = super.get(i); + kelondroRow.Entry r; + r = super.get(i); super.remove(i); return r; - } } public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { diff --git a/source/de/anomic/kelondro/kelondroFlexWidthArray.java b/source/de/anomic/kelondro/kelondroFlexWidthArray.java index 3047ff00b..5c80c51bf 100644 --- a/source/de/anomic/kelondro/kelondroFlexWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFlexWidthArray.java @@ -104,8 +104,41 @@ public class kelondroFlexWidthArray implements kelondroArray { } } + public static void delete(File path, String tablename) { + File tabledir = new File(path, tablename); + if ((tabledir.exists()) && (!(tabledir.isDirectory()))) { + tabledir.delete(); + return; + } + + String[] files = tabledir.list(); + for (int i = 0; i < files.length; i++) { + new File(tabledir, files[i]).delete(); + } + + tabledir.delete(); + } + + public static kelondroFlexWidthArray open(File path, String tablename, kelondroRow rowdef) { + try { + return new kelondroFlexWidthArray(path, tablename, rowdef); + } catch (IOException e) { + kelondroFlexWidthArray.delete(path, tablename); + try { + return new kelondroFlexWidthArray(path, tablename, rowdef); + } catch (IOException ee) { + e.printStackTrace(); + ee.printStackTrace(); + System.exit(-1); + return null; + } + } + } + public void close() throws IOException { - for (int i = 0; i < col.length; i++) if (col[i] != null) col[i].close(); + synchronized (col) { + for (int i = 0; i < col.length; i++) if (col[i] != null) col[i].close(); + } } protected static final String colfilename(int start, int end) { @@ -222,9 +255,8 @@ public class kelondroFlexWidthArray implements kelondroArray { String testname = "flextest"; try { System.out.println("erster Test"); - new File(f, testname).delete(); - - kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", rowdef); + kelondroFlexWidthArray.delete(f, testname); + kelondroFlexWidthArray k = kelondroFlexWidthArray.open(f, "flextest", rowdef); k.add(k.row().newEntry(new byte[][]{"a".getBytes(), "xxxx".getBytes()})); k.add(k.row().newEntry(new byte[][]{"b".getBytes(), "xxxx".getBytes()})); k.remove(0); @@ -243,16 +275,22 @@ public class kelondroFlexWidthArray implements kelondroArray { System.out.println("zweiter Test"); - new File(f, testname).delete(); - k = new kelondroFlexWidthArray(f, "flextest", rowdef); + kelondroFlexWidthArray.delete(f, testname); + //k = kelondroFlexWidthArray.open(f, "flextest", rowdef); for (int i = 1; i <= 20; i = i * 2) { + System.out.println("LOOP: " + i); + k = kelondroFlexWidthArray.open(f, "flextest", rowdef); for (int j = 0; j < i*2; j++) { k.add(k.row().newEntry(new byte[][]{(Integer.toString(i) + "-" + Integer.toString(j)).getBytes(), "xxxx".getBytes()})); } + k.close(); + k = kelondroFlexWidthArray.open(f, "flextest", rowdef); for (int j = 0; j < i; j++) { - k.remove(j); + k.remove(i*2 - j - 1); } + k.close(); } + k = kelondroFlexWidthArray.open(f, "flextest", rowdef); k.print(); k.col[0].print(true); k.close(); diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index c6adccd56..71cd9fbf5 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -524,14 +524,14 @@ public class kelondroRecords { } protected final void deleteNode(Handle handle) throws IOException { - if (cacheSize != 0) { + if (cacheSize == 0) { + dispose(handle); + } else { synchronized (cacheHeaders) { cacheHeaders.removeb(handle.index); cacheDelete++; dispose(handle); } - } else { - dispose(handle); } } @@ -980,11 +980,16 @@ public class kelondroRecords { // delete element with handle h // this element is then connected to the deleted-chain and can be // re-used change counter + long sp = seekpos(h); + if (sp >= entryFile.length()) { + // a deletion of a node that cannot exist is wrong + throw new IOException("dispose: handle position " + h.index + "/" + sp + " exceeds file size " + entryFile.length()); + } synchronized (USAGE) { USAGE.USEDC--; USAGE.FREEC++; // change pointer - entryFile.writeInt(seekpos(h), USAGE.FREEH.index); // extend free-list + entryFile.writeInt(sp, USAGE.FREEH.index); // extend free-list // write new FREEH Handle link USAGE.FREEH = h; USAGE.write(); diff --git a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java index 258956b47..391d1752f 100644 --- a/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaCrawlNURLImporter.java @@ -88,7 +88,7 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor // init noticeUrlDB this.log.logInfo("Initializing the source noticeUrlDB"); - this.importNurlDB = new plasmaCrawlNURL(this.importPath, ((this.cacheSize*3)/4)/1024, preloadTime); + this.importNurlDB = new plasmaCrawlNURL(this.importPath, ((this.cacheSize*3)/4)/1024, preloadTime, false); this.importStartSize = this.importNurlDB.size(); //int stackSize = this.importNurlDB.stackSize(); diff --git a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java index 69fcd4929..40cb34aa1 100644 --- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java @@ -77,7 +77,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter { this.log.logFine("Initializing source word index db."); this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, this.log, sb.getConfigBool("useCollectionIndex", false)); this.log.logFine("Initializing import URL db."); - this.importUrlDB = new plasmaCrawlLURL(new File(this.importPath, "urlHash.db"), (this.cacheSize/2)/1024, preloadTime / 2); + this.importUrlDB = new plasmaCrawlLURL(new File(this.importPath, "urlHash.db"), (this.cacheSize/2)/1024, preloadTime / 2, false); this.importStartSize = this.importWordIndex.size(); } diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index fdd097907..7c692d271 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -59,6 +59,7 @@ import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroRow; +import de.anomic.kelondro.kelondroTree; import de.anomic.tools.bitfield; public class plasmaCrawlEURL extends indexURL { @@ -123,7 +124,7 @@ public class plasmaCrawlEURL extends indexURL { * ======================================================================= */ private LinkedList rejectedStack = new LinkedList(); // strings: url - public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime) { + public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) { super(); kelondroRow rowdef = new kelondroRow( "String urlhash-" + urlHashLength + ", " + // the url's hash @@ -138,32 +139,20 @@ public class plasmaCrawlEURL extends indexURL { "String failcause-" + urlErrorLength + ", " + // string describing load failure "byte[] flags-" + urlFlagLength); // extra space - - String newCacheName = "urlErr3.table"; - cachePath.mkdirs(); - try { - urlHashCache = new kelondroFlexTable(cachePath, newCacheName, kelondroBase64Order.enhancedCoder, bufferkb * 0x400, preloadTime, rowdef); - } catch (IOException e) { - e.printStackTrace(); - System.exit(-1); - } - - /* - File oldCacheFile = new File(cachePath, "urlErr0.db"); - if (oldCacheFile.exists()) try { - // open existing cache - kelondroTree tree = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); - tree.assignRowdef(rowdef); - urlHashCache = tree; - } catch (IOException e) { - oldCacheFile.delete(); - urlHashCache = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); + if (newdb) { + String newCacheName = "urlErr3.table"; + cachePath.mkdirs(); + try { + urlHashCache = new kelondroFlexTable(cachePath, newCacheName, bufferkb * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } } else { - // create new cache + File oldCacheFile = new File(cachePath, "urlErr0.db"); oldCacheFile.getParentFile().mkdirs(); - urlHashCache = new kelondroTree(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); + urlHashCache = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef); } - */ } public synchronized Entry newEntry(URL url, String referrer, String initiator, String executor, diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index ccbe80c8b..f5add6e87 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -96,7 +96,7 @@ public final class plasmaCrawlLURL extends indexURL { //public static Set damagedURLS = Collections.synchronizedSet(new HashSet()); - public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime) { + public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) { super(); kelondroRow rowdef = new kelondroRow( "String urlhash-" + urlHashLength + ", " + // the url's hash @@ -962,7 +962,7 @@ public final class plasmaCrawlLURL extends indexURL { } catch (MalformedURLException e) {} if (args[0].equals("-l")) try { // arg 1 is path to URLCache - final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1, 0); + final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1, 0, false); final Iterator enu = urls.entries(true, false, null); while (enu.hasNext()) { ((Entry) enu.next()).print(); diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index ed9cf404b..34b754d1c 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -55,6 +55,7 @@ import java.util.Iterator; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; +import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroRecords; import de.anomic.kelondro.kelondroStack; import de.anomic.kelondro.kelondroRow; @@ -102,9 +103,10 @@ public class plasmaCrawlNURL extends indexURL { private File cacheStacksPath; private int bufferkb; private long preloadTime; + private boolean newdb; initStackIndex initThead; - public plasmaCrawlNURL(File cachePath, int bufferkb, long preloadTime) { + public plasmaCrawlNURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) { super(); this.cacheStacksPath = cachePath; this.bufferkb = bufferkb; @@ -112,7 +114,7 @@ public class plasmaCrawlNURL extends indexURL { // create a stack for newly entered entries if (!(cachePath.exists())) cachePath.mkdir(); // make the path - + this.newdb = newdb; openHashCache(); File coreStackFile = new File(cachePath, "urlNoticeLocal0.stack"); @@ -147,21 +149,20 @@ public class plasmaCrawlNURL extends indexURL { } private void openHashCache() { - /* - String newCacheName = "urlNotice3.table"; - cacheStacksPath.mkdirs(); - try { - urlHashCache = new kelondroFlexTable(cacheStacksPath, newCacheName, kelondroBase64Order.enhancedCoder, bufferkb * 0x400, preloadTime, rowdef); - } catch (IOException e) { - e.printStackTrace(); - System.exit(-1); + if (newdb) { + String newCacheName = "urlNotice4.table"; + cacheStacksPath.mkdirs(); + try { + urlHashCache = new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } + } else { + File oldCacheFile = new File(cacheStacksPath, "urlNotice1.db"); + oldCacheFile.getParentFile().mkdirs(); + urlHashCache = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef); } - */ - - File oldCacheFile = new File(cacheStacksPath, "urlNotice1.db"); - oldCacheFile.getParentFile().mkdirs(); - urlHashCache = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef); - } private void resetHashCache() { diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 9d5b910b8..77537632b 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -62,6 +62,8 @@ import de.anomic.http.httpc; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; +import de.anomic.kelondro.kelondroFlexTable; +import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroTree; import de.anomic.plasma.plasmaCrawlEURL; @@ -81,10 +83,10 @@ public final class plasmaCrawlStacker { //private boolean stopped = false; private stackCrawlQueue queue; - public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime) { + public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, boolean newdb) { this.sb = sb; - this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime); + this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, newdb); this.log.logInfo(this.queue.size() + " entries in the stackCrawl queue."); this.log.logInfo("STACKCRAWL thread initialized."); @@ -578,12 +580,13 @@ public final class plasmaCrawlStacker { private final serverSemaphore readSync; private final serverSemaphore writeSync; private final LinkedList urlEntryHashCache; - private kelondroTree urlEntryCache; + private kelondroIndex urlEntryCache; private File cacheStacksPath; private int bufferkb; private long preloadTime; + private boolean newdb; - public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime) { + public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, boolean newdb) { // init the read semaphore this.readSync = new serverSemaphore (0); @@ -597,6 +600,7 @@ public final class plasmaCrawlStacker { this.cacheStacksPath = cacheStacksPath; this.bufferkb = bufferkb; this.preloadTime = preloadTime; + this.newdb = newdb; openDB(); try { @@ -639,25 +643,43 @@ public final class plasmaCrawlStacker { private void openDB() { if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path - File cacheFile = new File(cacheStacksPath, "urlPreNotice.db"); - cacheFile.getParentFile().mkdirs(); - this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef); + + if (this.newdb) { + String newCacheName = "urPreNotice1.table"; + cacheStacksPath.mkdirs(); + try { + this.urlEntryCache = new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } + } else { + + + File cacheFile = new File(cacheStacksPath, "urlPreNotice.db"); + cacheFile.getParentFile().mkdirs(); + this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef); + } } - + public int cacheNodeChunkSize() { - return urlEntryCache.cacheNodeChunkSize(); + if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheNodeChunkSize(); + return 0; } - public int cacheObjectChunkSize() { - return urlEntryCache.cacheObjectChunkSize(); + public int[] cacheNodeStatus() { + if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheNodeStatus(); + return new int[]{0,0,0,0,0,0,0,0,0,0}; } - public int[] cacheNodeStatus() { - return urlEntryCache.cacheNodeStatus(); + public int cacheObjectChunkSize() { + if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheObjectChunkSize(); + return 0; } public long[] cacheObjectStatus() { - return urlEntryCache.cacheObjectStatus(); + if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheObjectStatus(); + return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; } public void close() throws IOException { diff --git a/source/de/anomic/plasma/plasmaRankingCRProcess.java b/source/de/anomic/plasma/plasmaRankingCRProcess.java index 92ed39a87..bc7be1b4b 100644 --- a/source/de/anomic/plasma/plasmaRankingCRProcess.java +++ b/source/de/anomic/plasma/plasmaRankingCRProcess.java @@ -260,7 +260,7 @@ public class plasmaRankingCRProcess { kelondroCollectionIndex newseq = null; if (newdb) { File path = to_file.getParentFile(); // path to storage place - newacc = new kelondroFlexTable(path, CRG_accname, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, CRG_accrow); + newacc = new kelondroFlexTable(path, CRG_accname, 128 * 1024 * 1024, -1, CRG_accrow, kelondroBase64Order.enhancedCoder); newseq = new kelondroCollectionIndex(path, CRG_seqname, 12, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, 2, CRG_colrow); } else { if (!(to_file.exists())) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 891ef2057..42d0c8e6a 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -405,7 +405,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // start indexing management log.logConfig("Starting Indexing Management"); - urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL, ramLURL_time); + urlPool = new plasmaURLPool(plasmaPath, + ramLURL, getConfigBool("useFlexTableForLURL", false), + ramNURL, getConfigBool("useFlexTableForNURL", false), + ramEURL, getConfigBool("useFlexTableForEURL", true), + ramLURL_time); wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false)); // set a high maximum cache size to current size; this is adopted later automatically @@ -583,7 +587,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser serverInstantThread.oneTimeJob(yc, "loadSeeds", yacyCore.log, 3000); // initializing the stackCrawlThread - this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time); + this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, getConfigBool("useFlexTableForPreNURL", false)); //this.sbStackCrawlThread = new plasmaStackCrawlThread(this,this.plasmaPath,ramPreNURL); //this.sbStackCrawlThread.start(); diff --git a/source/de/anomic/plasma/plasmaURLPool.java b/source/de/anomic/plasma/plasmaURLPool.java index 9221f39a2..f2589915c 100644 --- a/source/de/anomic/plasma/plasmaURLPool.java +++ b/source/de/anomic/plasma/plasmaURLPool.java @@ -57,10 +57,14 @@ public class plasmaURLPool { public final plasmaCrawlNURL noticeURL; public final plasmaCrawlEURL errorURL; - public plasmaURLPool(File plasmaPath, int ramLURL, int ramNURL, int ramEURL, long preloadTime) { - loadedURL = new plasmaCrawlLURL(plasmaPath, ramLURL, preloadTime); - noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, -1); - errorURL = new plasmaCrawlEURL(plasmaPath, ramEURL, -1); + public plasmaURLPool(File plasmaPath, + int ramLURL, boolean newLURL, + int ramNURL, boolean newNURL, + int ramEURL, boolean newEURL, + long preloadTime) { + loadedURL = new plasmaCrawlLURL(plasmaPath, ramLURL, preloadTime, newLURL); + noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, -1, newNURL); + errorURL = new plasmaCrawlEURL(plasmaPath, ramEURL, -1, newEURL); } public String exists(String hash) { diff --git a/source/yacy.java b/source/yacy.java index 4fb63694f..26c15de87 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -700,10 +700,10 @@ public final class yacy { // db containing all currently loades urls int cache = dbcache * 1024; // in KB log.logFine("URLDB-Caches: "+cache+" bytes"); - plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), cache, 10000); + plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), cache, 10000, false); // db used to hold all neede urls - plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.temp.db"), cache, 10000); + plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.temp.db"), cache, 10000, false); Runtime rt = Runtime.getRuntime(); int cacheMem = (int)((serverMemory.max-rt.totalMemory())/1024)-(2*cache + 8*1024); @@ -940,7 +940,7 @@ public final class yacy { File root = new File(homePath); try { - plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000, 10000); + plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, false, 1000, false, 1000, false, 10000); HashMap doms = new HashMap(); System.out.println("Started domain list extraction from " + pool.loadedURL.size() + " url entries."); System.out.println("a dump will be written after double-check of all extracted domains."); @@ -1055,7 +1055,7 @@ public final class yacy { private static void urllist(String homePath, String source, boolean html, String targetName) { File root = new File(homePath); try { - plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, 1000, 1000, 10000); + plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), 16000, false, 1000, false, 1000, false, 10000); File file = new File(root, targetName); BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file)); @@ -1133,7 +1133,7 @@ public final class yacy { serverLog log = new serverLog("URLDBCLEANUP"); try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {} try { - plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304, 10000); + plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(new File(dbroot, "urlHash.db"), 4194304, 10000, false); currentUrlDB.urldbcleanup(); currentUrlDB.close(); } catch (IOException e) { diff --git a/yacy.init b/yacy.init index 4fc4c02d6..fbcdb95fc 100644 --- a/yacy.init +++ b/yacy.init @@ -797,3 +797,6 @@ currentSkin= # temporary flag for new database structure. set only true for testing # ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION useCollectionIndex=false +useFlexTableForNURL=false +useFlexTableForEURL=true +useFlexTableForPreNURL=false