From 6396f5971ef06fa97cca6d2900690c58f3c1a4a2 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 11 Oct 2006 00:46:45 +0000 Subject: [PATCH] bugfixes and migration attempt toward new kelondroFlex db - more synchronization - bugfix for remove in collections - bugfix in kelondroFlex (wrong exception condition!) - options to use RAM, FLEX and TREE tables for Crawl URL stacker - default for Crawl URL stacker is now FLEX (!) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2746 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 10 ++--- .../anomic/kelondro/kelondroBytesIntMap.java | 10 ++--- .../kelondro/kelondroCollectionIndex.java | 4 +- .../kelondro/kelondroFixedWidthArray.java | 2 +- .../de/anomic/kelondro/kelondroFlexTable.java | 2 +- .../de/anomic/kelondro/kelondroRAMIndex.java | 10 ++--- .../de/anomic/plasma/plasmaCrawlStacker.java | 37 +++++++++++++------ .../de/anomic/plasma/plasmaSwitchboard.java | 2 +- yacy.init | 5 ++- 9 files changed, 49 insertions(+), 33 deletions(-) diff --git a/build.properties b/build.properties index 96c804a04..8b097ed2d 100644 --- a/build.properties +++ b/build.properties @@ -3,11 +3,11 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.48 -#releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz -releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz -#releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} -releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr} +releaseVersion=0.481 +releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz +#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz +releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} +#releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr} releaseFileParentDir=yacy releaseNr=$Revision$ diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index da134cdde..c476ca2e8 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -37,13 +37,13 @@ public class kelondroBytesIntMap { this.ki = ki; } - public int geti(byte[] key) throws IOException { + public synchronized int geti(byte[] key) throws IOException { kelondroRow.Entry indexentry = ki.get(key); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); } - public int puti(byte[] key, int i) throws IOException { + public synchronized int puti(byte[] key, int i) throws IOException { kelondroRow.Entry newentry = ki.row().newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); @@ -52,18 +52,18 @@ public class kelondroBytesIntMap { return (int) oldentry.getColLong(1); } - public int removei(byte[] key) throws IOException { + public synchronized int removei(byte[] key) throws IOException { if (ki.size() == 0) return -1; kelondroRow.Entry indexentry = ki.remove(key); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); } - public int size() throws IOException { + public synchronized int size() throws IOException { return ki.size(); } - public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { // returns the row-iterator of the underlying kelondroIndex // col[0] = key // col[1] = integer as {b265} diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index cb1528010..6dbaa2e2e 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -408,7 +408,9 @@ public class kelondroCollectionIndex { synchronized (index) { kelondroRow.Entry indexrow = index.get(key); if (indexrow == null) return null; - return getdelete(indexrow, true, false); + kelondroRowSet removedCollection = getdelete(indexrow, true, false); + index.remove(key); + return removedCollection; } } diff --git a/source/de/anomic/kelondro/kelondroFixedWidthArray.java b/source/de/anomic/kelondro/kelondroFixedWidthArray.java index bebbf5b17..a44f63c66 100644 --- a/source/de/anomic/kelondro/kelondroFixedWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFixedWidthArray.java @@ -128,7 +128,7 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro } public synchronized void remove(int index) throws IOException { - if (index >= size()) throw new IOException("remove: index " + index + " out of bounds " + size()); + if (index >= super.USAGE.allCount()) throw new IOException("remove: index " + index + " out of bounds " + super.USAGE.allCount()); // get the node at position index Handle h = new Handle(index); diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 734b1a893..d171cab8c 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -155,7 +155,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr return r; } - public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { return new rowIterator(up, rotating, firstKey); } diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java index 0c36da603..df0acd6d0 100644 --- a/source/de/anomic/kelondro/kelondroRAMIndex.java +++ b/source/de/anomic/kelondro/kelondroRAMIndex.java @@ -47,7 +47,7 @@ public class kelondroRAMIndex implements kelondroIndex { return this.order; } - public int size() { + public synchronized int size() { return this.index.size(); } @@ -55,19 +55,19 @@ public class kelondroRAMIndex implements kelondroIndex { return this.rowdef; } - public Entry get(byte[] key) { + public synchronized Entry get(byte[] key) { return (kelondroRow.Entry) index.get(key); } - public Entry put(Entry row) { + public synchronized Entry put(Entry row) { return (kelondroRow.Entry) index.put(row.getColBytes(0), row); } - public Entry remove(byte[] key) { + public synchronized Entry remove(byte[] key) { return (kelondroRow.Entry) index.remove(key); } - public Iterator rows(boolean up, boolean rotating, byte[] firstKey) { + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) { return index.values().iterator(); } diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index b61e6eb09..52171a235 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -64,6 +64,7 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; +import de.anomic.kelondro.kelondroRAMIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroTree; import de.anomic.net.URL; @@ -75,6 +76,11 @@ import de.anomic.yacy.yacyCore; public final class plasmaCrawlStacker { + // keys for different database types + public static final int QUEUE_DB_TYPE_RAM = 0; + public static final int QUEUE_DB_TYPE_TREE = 1; + public static final int QUEUE_DB_TYPE_FLEX = 2; + final WorkerPool theWorkerPool; private GenericObjectPool.Config theWorkerPoolConfig = null; final ThreadGroup theWorkerThreadGroup = new ThreadGroup("stackCrawlThreadGroup"); @@ -83,10 +89,10 @@ public final class plasmaCrawlStacker { //private boolean stopped = false; private stackCrawlQueue queue; - public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, boolean newdb) { + public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, int dbtype) { this.sb = sb; - this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, newdb); + this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, dbtype); this.log.logInfo(this.queue.size() + " entries in the stackCrawl queue."); this.log.logInfo("STACKCRAWL thread initialized."); @@ -248,7 +254,7 @@ public final class plasmaCrawlStacker { public String stackCrawl(String nexturlString, String referrerString, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) throws InterruptedException { // stacks a crawl item. The position can also be remote // returns null if successful, a reason string if not successful - this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'"); + //this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'"); long startTime = System.currentTimeMillis(); String reason = null; // failure reason @@ -384,8 +390,7 @@ public final class plasmaCrawlStacker { boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder()); if ((dbocc != null) && (!(recrawl))) { reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")"; - this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + - "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); + //this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); return reason; } @@ -597,9 +602,9 @@ public final class plasmaCrawlStacker { private File cacheStacksPath; private int bufferkb; private long preloadTime; - private boolean newdb; + private int dbtype; - public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, boolean newdb) { + public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, int dbtype) { // init the read semaphore this.readSync = new serverSemaphore (0); @@ -613,7 +618,7 @@ public final class plasmaCrawlStacker { this.cacheStacksPath = cacheStacksPath; this.bufferkb = bufferkb; this.preloadTime = preloadTime; - this.newdb = newdb; + this.dbtype = dbtype; openDB(); try { @@ -650,9 +655,13 @@ public final class plasmaCrawlStacker { } private void deleteDB() { - if (this.newdb) { + if (this.dbtype == QUEUE_DB_TYPE_RAM) { + // do nothing.. + } + if (this.dbtype == QUEUE_DB_TYPE_FLEX) { kelondroFlexTable.delete(cacheStacksPath, "urlPreNotice1.table"); - } else { + } + if (this.dbtype == QUEUE_DB_TYPE_TREE) { File cacheFile = new File(cacheStacksPath, "urlPreNotice.db"); cacheFile.delete(); } @@ -661,7 +670,10 @@ public final class plasmaCrawlStacker { private void openDB() { if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path - if (this.newdb) { + if (this.dbtype == QUEUE_DB_TYPE_RAM) { + this.urlEntryCache = new kelondroRAMIndex(kelondroBase64Order.enhancedCoder, plasmaCrawlNURL.rowdef); + } + if (this.dbtype == QUEUE_DB_TYPE_FLEX) { String newCacheName = "urlPreNotice1.table"; cacheStacksPath.mkdirs(); try { @@ -670,7 +682,8 @@ public final class plasmaCrawlStacker { e.printStackTrace(); System.exit(-1); } - } else { + } + if (this.dbtype == QUEUE_DB_TYPE_TREE) { File cacheFile = new File(cacheStacksPath, "urlPreNotice.db"); cacheFile.getParentFile().mkdirs(); this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index f5990ebaf..bc9ed397c 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -606,7 +606,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser serverInstantThread.oneTimeJob(yc, "loadSeeds", yacyCore.log, 3000); // initializing the stackCrawlThread - this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, getConfigBool("useFlexTableForPreNURL", false)); + this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, (int) getConfigLong("tableTypeForPreNURL", 0)); //this.sbStackCrawlThread = new plasmaStackCrawlThread(this,this.plasmaPath,ramPreNURL); //this.sbStackCrawlThread.start(); diff --git a/yacy.init b/yacy.init index 5171fa40d..9c6b7d9d0 100644 --- a/yacy.init +++ b/yacy.init @@ -497,7 +497,7 @@ xpstopw=true 80_indexing_busysleep=100 80_indexing_memprereq=2097152 82_crawlstack_idlesleep=5000 -82_crawlstack_busysleep=10 +82_crawlstack_busysleep=0 82_crawlstack_memprereq=1048576 90_cleanup_idlesleep=300000 90_cleanup_busysleep=300000 @@ -815,10 +815,11 @@ currentSkin= # temporary flag for new database structure. set only true for testing # ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION +# table-types: RAM = 0, TREE = 1, FLEX = 2; useCollectionIndex=false useFlexTableForNURL=false useFlexTableForEURL=true -useFlexTableForPreNURL=false +tableTypeForPreNURL=2 # flag to show surftipps on index.html page showSurftipps = true