From 7aa860c505b5d37ffe8c5f6c74120f830f6ce097 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 21 Aug 2010 10:16:05 +0000 Subject: [PATCH] - more logging - more stability for database heap in case of buffer failure git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7058 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/RobotsTxt.java | 4 +- source/de/anomic/search/Switchboard.java | 14 +---- source/net/yacy/kelondro/blob/Heap.java | 56 ++++++++++++++----- source/net/yacy/kelondro/blob/HeapReader.java | 4 +- 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index ca28356bd..b37da92a3 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -67,9 +67,11 @@ public class RobotsTxt { public RobotsTxt(final BEncodedHeap robotsTable) { this.robotsTable = robotsTable; syncObjects = new ConcurrentHashMap(); + log.logInfo("initiated robots table: " + robotsTable.getFile()); } public void clear() { + log.logInfo("clearing robots table"); try { this.robotsTable.clear(); } catch (IOException e) { @@ -174,7 +176,7 @@ public class RobotsTxt { int sz = this.robotsTable.size(); addEntry(robotsTxt4Host); if (this.robotsTable.size() <= sz) { - Log.logSevere("RobotsTxt", "new entry in robots.txt table failed, resetting database"); + log.logSevere("new entry in robots.txt table failed, resetting database"); this.clear(); addEntry(robotsTxt4Host); } diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 7cf670836..8bdbe65ab 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -430,13 +430,10 @@ public final class Switchboard extends serverSwitch { RankingProcess.loadYBR(YBRPath, 15); } - // loading the robots.txt db + // load the robots.txt db this.log.logConfig("Initializing robots.txt DB"); - final File robotsDBFile = new File(queuesRoot, "crawlRobotsTxt.heap"); robots = new RobotsTxt(this.tables.getHeap(WorkTables.TABLE_ROBOTS_NAME)); - this.log.logConfig("Loaded robots.txt DB from file " + robotsDBFile.getName() + - ", " + robots.size() + " entries" + - ", " + ppRamString(robotsDBFile.length()/1024)); + this.log.logConfig("Loaded robots.txt DB: " + robots.size() + " entries"); // start a cache manager log.logConfig("Starting HT Cache Manager"); @@ -901,13 +898,6 @@ public final class Switchboard extends serverSwitch { // create new web structure this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map")); - - // load the robots.txt database - this.log.logConfig("Initializing robots.txt DB"); - final File robotsDBFile = new File(this.queuesRoot, "crawlRobotsTxt.heap"); - this.log.logConfig("Loaded robots.txt DB from file " + robotsDBFile.getName() + - ", " + robots.size() + " entries" + - ", " + ppRamString(robotsDBFile.length()/1024)); this.crawlStacker = new CrawlStacker( this.crawlQueues, diff --git a/source/net/yacy/kelondro/blob/Heap.java b/source/net/yacy/kelondro/blob/Heap.java index 2ff14fa04..7243da643 100755 --- a/source/net/yacy/kelondro/blob/Heap.java +++ b/source/net/yacy/kelondro/blob/Heap.java @@ -86,6 +86,7 @@ public final class Heap extends HeapModifier implements BLOB { this.buffermax = buffermax; this.buffer = new TreeMap(ordering); this.buffersize = 0; + Log.logInfo("Heap", "initializing heap " + this.name()); /* // DEBUG Iterator i = index.keys(true, null); @@ -125,7 +126,10 @@ public final class Heap extends HeapModifier implements BLOB { key = normalizeKey(key); synchronized (this) { // check the buffer - if (this.buffer.containsKey(key)) return true; + assert buffer != null; + if (buffer == null) { + if (this.buffer.containsKey(key)) return true; + } return super.has(key); } } @@ -159,6 +163,8 @@ public final class Heap extends HeapModifier implements BLOB { * @throws RowSpaceExceededException */ public void flushBuffer() throws IOException { + assert buffer != null; + if (buffer == null) return; // check size of buffer Iterator> i = this.buffer.entrySet().iterator(); int l = 0; @@ -212,7 +218,8 @@ public final class Heap extends HeapModifier implements BLOB { assert ba.length == posBuffer; // must fit exactly this.file.seek(pos); this.file.write(ba); - this.buffer = nextBuffer; + this.buffer.clear(); + this.buffer.putAll(nextBuffer); this.buffersize = 0; } @@ -228,8 +235,11 @@ public final class Heap extends HeapModifier implements BLOB { synchronized (this) { // check the buffer - byte[] blob = this.buffer.get(key); - if (blob != null) return blob; + assert buffer != null; + if (buffer != null) { + byte[] blob = this.buffer.get(key); + if (blob != null) return blob; + } return super.get(key); } @@ -247,9 +257,12 @@ public final class Heap extends HeapModifier implements BLOB { synchronized (this) { // check the buffer - byte[] blob = this.buffer.get(key); - if (blob != null) return blob.length; - + assert buffer != null; + if (buffer != null) { + byte[] blob = this.buffer.get(key); + if (blob != null) return blob.length; + } + return super.length(key); } } @@ -260,6 +273,9 @@ public final class Heap extends HeapModifier implements BLOB { */ @Override public synchronized void clear() throws IOException { + Log.logInfo("Heap", "clearing heap " + this.name()); + assert buffer != null; + if (buffer == null) buffer = new TreeMap(ordering); this.buffer.clear(); this.buffersize = 0; super.clear(); @@ -270,6 +286,7 @@ public final class Heap extends HeapModifier implements BLOB { */ @Override public synchronized void close(final boolean writeIDX) { + Log.logInfo("Heap", "closing heap " + this.name()); if (file != null && buffer != null) { try { flushBuffer(); @@ -321,6 +338,8 @@ public final class Heap extends HeapModifier implements BLOB { if (putToGap(key, b)) return; } catch (RowSpaceExceededException e) {} // too less space can be ignored, we have a second try + assert this.buffer != null; + // if there is not enough space in the buffer, flush all if (this.buffersize + b.length > buffermax) { // this is too big. Flush everything @@ -329,15 +348,19 @@ public final class Heap extends HeapModifier implements BLOB { if (b.length > buffermax) { this.add(key, b); } else { - this.buffer.put(key, b); - this.buffersize += b.length; + if (this.buffer != null) { + this.buffer.put(key, b); + this.buffersize += b.length; + } } return; } // add entry to buffer - this.buffer.put(key, b); - this.buffersize += b.length; + if (this.buffer != null) { + this.buffer.put(key, b); + this.buffersize += b.length; + } } } @@ -438,10 +461,13 @@ public final class Heap extends HeapModifier implements BLOB { synchronized (this) { // check the buffer - byte[] blob = this.buffer.remove(key); - if (blob != null) { - this.buffersize -= blob.length; - return; + assert buffer != null; + if (buffer != null) { + byte[] blob = this.buffer.remove(key); + if (blob != null) { + this.buffersize -= blob.length; + return; + } } super.remove(key); diff --git a/source/net/yacy/kelondro/blob/HeapReader.java b/source/net/yacy/kelondro/blob/HeapReader.java index 1d809d20b..371d57519 100644 --- a/source/net/yacy/kelondro/blob/HeapReader.java +++ b/source/net/yacy/kelondro/blob/HeapReader.java @@ -283,12 +283,12 @@ public class HeapReader { lastFree = nextFree; } } - Log.logInfo("kelondroBLOBHeap", "BLOB " + heapFile.getName() + ": merged " + merged + " free records"); + Log.logInfo("kelondroBLOBHeap", "BLOB " + heapFile.toString() + ": merged " + merged + " free records"); } } public String name() { - return this.heapFile.getName(); + return this.heapFile.toString(); } public File location() {