From e1f67262f7126a5d66c2d4bf03b90f702bf76354 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 18 Sep 2008 14:12:15 +0000 Subject: [PATCH] - added and removed some debugging output - fixed a bug with merge method - patched wrong output of language identification (not fixed, only patched!) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5181 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/Balancer.java | 8 +++--- .../de/anomic/kelondro/kelondroEcoTable.java | 25 +++++++++++-------- .../kelondro/kelondroMergeIterator.java | 11 +++++--- source/de/anomic/plasma/plasmaWordIndex.java | 1 + source/de/anomic/server/serverMemory.java | 2 ++ 5 files changed, 29 insertions(+), 18 deletions(-) diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index 708ec359e..46f40c825 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -365,7 +365,7 @@ public class Balancer { public synchronized void push(final CrawlEntry entry) throws IOException { assert entry != null; if (urlFileIndex.has(entry.url().hash().getBytes())) { - serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.url().hash() + " in " + stackname + " - fixed"); + serverLog.logWarning("BALANCER", "double-check has failed for urlhash " + entry.url().hash() + " in " + stackname + " - fixed"); return; } @@ -534,7 +534,7 @@ public class Balancer { // emergency case: this means that something with the stack organization is wrong // the file appears to be broken. We kill the file. kelondroStack.reset(urlFileStack); - serverLog.logSevere("PLASMA BALANCER", "get() failed to fetch entry from file stack. reset stack file."); + serverLog.logSevere("BALANCER", "get() failed to fetch entry from file stack. reset stack file."); } else { final String nexthash = new String(nextentry.getColBytes(0)); @@ -554,7 +554,7 @@ public class Balancer { // check case where we did not found anything if (result == null) { - serverLog.logSevere("PLASMA BALANCER", "get() was not able to find a valid urlhash - total size = " + size() + ", fileStack.size() = " + urlFileStack.size() + ", ramStack.size() = " + urlRAMStack.size() + ", domainStacks.size() = " + domainStacks.size()); + serverLog.logSevere("BALANCER", "get() was not able to find a valid urlhash - total size = " + size() + ", fileStack.size() = " + urlFileStack.size() + ", ramStack.size() = " + urlRAMStack.size() + ", domainStacks.size() = " + domainStacks.size()); return null; } @@ -573,7 +573,7 @@ public class Balancer { // in best case, this should never happen if the balancer works propertly // this is only to protection against the worst case, where the crawler could // behave in a DoS-manner - serverLog.logInfo("PLASMA BALANCER", "forcing fetch delay of " + sleeptime + " millisecond for " + crawlEntry.url().getHost()); + serverLog.logInfo("BALANCER", "forcing fetch delay of " + sleeptime + " millisecond for " + crawlEntry.url().getHost()); try {synchronized(this) { this.wait(sleeptime); }} catch (final InterruptedException e) {} } diff --git a/source/de/anomic/kelondro/kelondroEcoTable.java b/source/de/anomic/kelondro/kelondroEcoTable.java index 86e2522c0..6987ab431 100644 --- a/source/de/anomic/kelondro/kelondroEcoTable.java +++ b/source/de/anomic/kelondro/kelondroEcoTable.java @@ -39,6 +39,7 @@ import java.util.TreeSet; import de.anomic.kelondro.kelondroRow.Entry; import de.anomic.server.serverMemory; +import de.anomic.server.logging.serverLog; /* * The EcoIndex builds upon the EcoFS and tries to reduce the number of IO requests that the @@ -106,21 +107,21 @@ public class kelondroEcoTable implements kelondroIndex { ((useTailCache == tailCacheForceUsage) || ((useTailCache == tailCacheUsageAuto) && (serverMemory.free() > neededRAM4table + 200 * 1024 * 1024)))) ? new kelondroRowSet(taildef, records) : null; - System.out.println("*** DEBUG " + tablefile + ": available RAM: " + (serverMemory.available() / 1024 / 1024) + "MB, allocating space for " + records + " entries"); + serverLog.logInfo("ECOTABLE", "initialization of " + tablefile + ": available RAM: " + (serverMemory.available() / 1024 / 1024) + "MB, allocating space for " + records + " entries"); final long neededRAM4index = 2 * 1024 * 1024 + records * (rowdef.primaryKeyLength + 4) * 3 / 2; if (!serverMemory.request(neededRAM4index, false)) { // despite calculations seemed to show that there is enough memory for the table AND the index // there is now not enough memory left for the index. So delete the table again to free the memory // for the index - System.out.println("*** DEBUG " + tablefile + ": not enough RAM (" + (serverMemory.available() / 1024 / 1024) + "MB) left for index, deleting allocated table space to enable index space allocation (needed: " + (neededRAM4index / 1024 / 1024) + "MB)"); + serverLog.logSevere("ECOTABLE", tablefile + ": not enough RAM (" + (serverMemory.available() / 1024 / 1024) + "MB) left for index, deleting allocated table space to enable index space allocation (needed: " + (neededRAM4index / 1024 / 1024) + "MB)"); table = null; System.gc(); - System.out.println("*** DEBUG " + tablefile + ": RAM after releasing the table: " + (serverMemory.available() / 1024 / 1024) + "MB"); + serverLog.logSevere("ECOTABLE", tablefile + ": RAM after releasing the table: " + (serverMemory.available() / 1024 / 1024) + "MB"); } index = new kelondroBytesIntMap(rowdef.primaryKeyLength, rowdef.objectOrder, records); - System.out.println("*** DEBUG " + tablefile + ": EcoTable " + tablefile.toString() + " has table copy " + ((table == null) ? "DISABLED" : "ENABLED")); + serverLog.logInfo("ECOTABLE", tablefile + ": EcoTable " + tablefile.toString() + " has table copy " + ((table == null) ? "DISABLED" : "ENABLED")); // read all elements from the file into the copy table - System.out.print("*** initializing RAM index for EcoTable " + tablefile.getName() + ":"); + serverLog.logInfo("ECOTABLE", "initializing RAM index for EcoTable " + tablefile.getName() + ", please wait."); int i = 0; byte[] key; if (table == null) { @@ -133,11 +134,12 @@ public class kelondroEcoTable implements kelondroIndex { if (key == null) {i++; continue;} if (!index.addi(key, i++)) fail++; assert index.size() + fail == i : "index.size() = " + index.size() + ", i = " + i + ", fail = " + fail + ", key = '" + new String(key) + "'"; - + /* if ((i % 10000) == 0) { System.out.print('.'); System.out.flush(); } + */ } } else { byte[] record; @@ -154,23 +156,24 @@ public class kelondroEcoTable implements kelondroIndex { // write the tail into the table table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true)); - + /* if ((i % 10000) == 0) { System.out.print('.'); System.out.flush(); } + */ } } // check consistency - System.out.print(" -ordering- .."); - System.out.flush(); + //System.out.print(" -ordering- .."); + //System.out.flush(); this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(tablefile, rowdef.objectsize), this.buffersize); final ArrayList doubles = index.removeDoubles(); //assert index.size() + doubles.size() + fail == i; - System.out.println(" -removed " + doubles.size() + " doubles- done."); + //System.out.println(" -removed " + doubles.size() + " doubles- done."); if (doubles.size() > 0) { - System.out.println("DEBUG " + tablefile + ": WARNING - EcoTable " + tablefile + " has " + doubles.size() + " doubles"); + serverLog.logInfo("ECOTABLE", tablefile + ": WARNING - EcoTable " + tablefile + " has " + doubles.size() + " doubles"); // from all the doubles take one, put it back to the index and remove the others from the file // first put back one element each final byte[] record = new byte[rowdef.objectsize]; diff --git a/source/de/anomic/kelondro/kelondroMergeIterator.java b/source/de/anomic/kelondro/kelondroMergeIterator.java index e8e83254a..8678c45cc 100644 --- a/source/de/anomic/kelondro/kelondroMergeIterator.java +++ b/source/de/anomic/kelondro/kelondroMergeIterator.java @@ -142,16 +142,21 @@ public class kelondroMergeIterator implements kelondroCloneableIterator { final Class c = Class.forName("de.anomic.kelondro.kelondroMergeIterator"); meth = c.getMethod("mergeEqualByReplace", new Class[]{Object.class, Object.class}); } catch (final SecurityException e) { - System.out.println("Error while initializing simpleMerge: " + e.getMessage()); + System.out.println("Error while initializing simpleMerge (1): " + e.getMessage()); meth = null; } catch (final ClassNotFoundException e) { - System.out.println("Error while initializing simpleMerge: " + e.getMessage()); + System.out.println("Error while initializing simpleMerge (2): " + e.getMessage()); meth = null; } catch (final NoSuchMethodException e) { - System.out.println("Error while initializing simpleMerge: " + e.getMessage()); + System.out.println("Error while initializing simpleMerge (3): " + e.getMessage()); meth = null; } simpleMerge = meth; } + // do not remove the following method, it is not reference anywhere directly but indirectly using reflection + // please see initialization of simpleMerge above + public static Object mergeEqualByReplace(final Object a, final Object b) { + return a; + } } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 205e56ad8..70f60ed3b 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -824,6 +824,7 @@ public final class plasmaWordIndex implements indexRI { language = entry.url().language(); } else { System.out.println("*** DEBUG LANGUAGE: identification of " + entry.url() + " SUCCESS: " + language); + if (language.equals("pl")) language = entry.url().language(); // patch a bug TODO: remove this if bug is fixed } // create a new loaded URL db entry diff --git a/source/de/anomic/server/serverMemory.java b/source/de/anomic/server/serverMemory.java index ba3726e23..485f1ba66 100644 --- a/source/de/anomic/server/serverMemory.java +++ b/source/de/anomic/server/serverMemory.java @@ -50,7 +50,9 @@ public class serverMemory { final long elapsed = System.currentTimeMillis() - lastGC; if (elapsed > last) { final long free = free(); + System.out.println("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv"); System.gc(); + System.out.println("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ if you see this many times please report to forum"); lastGC = System.currentTimeMillis(); if (log.isFine()) log.logInfo("[gc] before: " + bytesToString(free) + ", after: " + bytesToString(free()) + ", call: " + info); } else if (log.isFine()) {