From 8b31f9e2024fdd54094895d92f4a1d1458761233 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 23 Apr 2005 13:00:56 +0000 Subject: [PATCH] enhanced shut-down behaviour & added experimental nio-wrapper for kelondroRA (not active yet) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@44 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/http/httpc.java | 2 +- .../anomic/kelondro/kelondroAbstractRA.java | 34 ++- .../de/anomic/kelondro/kelondroNIOFileRA.java | 253 ++++++++++++++++++ .../de/anomic/kelondro/kelondroRecords.java | 2 + .../de/anomic/plasma/plasmaCrawlProfile.java | 6 + source/de/anomic/plasma/plasmaHTCache.java | 4 + source/de/anomic/plasma/plasmaParser.java | 4 + .../de/anomic/plasma/plasmaSwitchboard.java | 9 +- source/de/anomic/plasma/plasmaWordIndex.java | 4 +- .../plasma/plasmaWordIndexRAMCache.java | 21 +- 10 files changed, 316 insertions(+), 23 deletions(-) create mode 100644 source/de/anomic/kelondro/kelondroNIOFileRA.java diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index 3f62cf24e..94427bd69 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -526,7 +526,7 @@ public final class httpc { // this is not an error: it's ok, we waited for that } catch (java.net.SocketTimeoutException e) { // the same here; should be ok. - } + } } // close the streams diff --git a/source/de/anomic/kelondro/kelondroAbstractRA.java b/source/de/anomic/kelondro/kelondroAbstractRA.java index 90a2abae1..c92addda0 100644 --- a/source/de/anomic/kelondro/kelondroAbstractRA.java +++ b/source/de/anomic/kelondro/kelondroAbstractRA.java @@ -124,18 +124,28 @@ abstract class kelondroAbstractRA implements kelondroRA { } public String readLine() throws IOException { - // with these functions, we consider a line as always terminated by CRLF - serverByteBuffer sb = new serverByteBuffer(); - int c; - while (true) { - c = read(); - if (c < 0) { - if (sb.length() == 0) return null; else return sb.toString(); - } - if (c == cr) continue; - if (c == lf) return sb.toString(); - sb.append((byte) c); - } + // with these functions, we consider a line as always terminated by CRLF + byte[] bb = new byte[80]; + int bbsize = 0; + int c; + while (true) { + c = read(); + if (c < 0) { + if (bbsize == 0) return null; else return new String(bb, 0, bbsize); + } + if (c == cr) continue; + if (c == lf) return new String(bb, 0, bbsize); + + // append to bb + if (bbsize == bb.length) { + // extend bb size + byte[] newbb = new byte[bb.length * 2]; + System.arraycopy(bb, 0, newbb, 0, bb.length); + bb = newbb; + newbb = null; + } + bb[bbsize++] = (byte) c; + } } public void writeProperties(Properties props, String comment) throws IOException { diff --git a/source/de/anomic/kelondro/kelondroNIOFileRA.java b/source/de/anomic/kelondro/kelondroNIOFileRA.java new file mode 100644 index 000000000..cd4c5ad0b --- /dev/null +++ b/source/de/anomic/kelondro/kelondroNIOFileRA.java @@ -0,0 +1,253 @@ +// kelondroNIOFileRA.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2002 +// last major change: 21.04.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; +import java.nio.*; +import java.nio.channels.*; + +public class kelondroNIOFileRA extends kelondroAbstractRA implements kelondroRA { + + protected final static long headSize = 1024; + + protected RandomAccessFile RAFile; + protected FileChannel RAChannel; + protected MappedByteBuffer bufferHead, bufferBody, bufferTail; + protected long seekPos; + protected long bodyOffset, tailOffset, tailCurrSize, tailMaxSize; + protected boolean mapBody; + protected boolean wroteHead, wroteBody, wroteTail; + + public kelondroNIOFileRA(String file, boolean mapBody, long tailMaxSize) throws IOException { + this(new File(file), mapBody, tailMaxSize); + } + + public kelondroNIOFileRA(File file, boolean mapBody, long tailMaxSize) throws IOException { + this.name = file.getName(); + this.seekPos = 0; + this.bodyOffset = headSize; + if (bodyOffset >= file.length()) { + bodyOffset = file.length(); + mapBody = false; + } + this.tailOffset = file.length(); + this.tailMaxSize = tailMaxSize; + this.tailCurrSize = 0; + this.mapBody = mapBody; + this.RAFile = new RandomAccessFile(file, "rw"); + this.RAChannel = RAFile.getChannel(); + this.bufferHead = RAChannel.map(FileChannel.MapMode.READ_WRITE, 0, (int) bodyOffset); + if (mapBody) + this.bufferBody = RAChannel.map(FileChannel.MapMode.READ_WRITE, bodyOffset, (int) (tailOffset - bodyOffset)); + else + this.bufferBody = null; + this.bufferTail = null; + this.wroteHead = false; + this.wroteBody = false; + this.wroteTail = false; + System.out.println("initialized " + name + " mapBody = " + ((mapBody) ? "true" : "false") + + ", bodyOffset = " + bodyOffset + ", tailOffset = " + tailOffset); + } + + private boolean growTail(long newPos) throws IOException { + if (tailCurrSize >= tailMaxSize) { + System.out.println("cannot grow " + name); + return false; + } + if (tailCurrSize == 0) { + // first grow + this.tailCurrSize = tailMaxSize / 10; + if (tailCurrSize < 1024) tailCurrSize = 1024; + if (tailCurrSize > tailMaxSize) tailCurrSize = tailMaxSize; + } else { + // next grow + tailCurrSize = tailCurrSize * 2; + if (tailCurrSize > tailMaxSize) tailCurrSize = tailMaxSize; + bufferTail.force(); + } + System.out.println("growing " + name + " nextSize=" + tailCurrSize); + bufferTail = RAChannel.map(FileChannel.MapMode.READ_WRITE, tailOffset, (int) tailCurrSize); + wroteTail = false; + return true; + } + + // pseudo-native method read + public int read() throws IOException { + int r; + if (seekPos < bodyOffset) { + r = 0xFF & ((int) bufferHead.get((int) seekPos)); + } else if (seekPos < tailOffset) { + if (mapBody) { + r = 0xFF & ((int) bufferBody.get((int) (seekPos - bodyOffset))); + } else { + RAFile.seek(seekPos); + r = RAFile.read(); + } + } else if (seekPos < (tailOffset + tailCurrSize)) { + r = 0xFF & ((int) bufferTail.get((int) (seekPos - tailOffset))); + } else { + r = -1; + while (growTail(seekPos)) { + if (seekPos < (tailOffset + tailCurrSize)) { + r = 0xFF & ((int) bufferTail.get((int) (seekPos - tailOffset))); + break; + } else { + RAFile.seek(seekPos); + r = RAFile.read(); + break; + } + } + } + seekPos++; + return r; + } + + // pseudo-native method write + public void write(int b) throws IOException { + if (seekPos < bodyOffset) { + bufferHead.put((int) seekPos, (byte) (b & 0xff)); + wroteHead = true; + } else if (seekPos < tailOffset) { + if (mapBody) { + bufferBody.put((int) (seekPos - bodyOffset), (byte) (b & 0xff)); + wroteBody = true; + } else { + RAFile.seek(seekPos); + RAFile.write(b); + } + } else if (seekPos < (tailOffset + tailCurrSize)) { + bufferTail.put((int) (seekPos - tailOffset), (byte) (b & 0xff)); + wroteTail = true; + } else { + while (growTail(seekPos)) { + if (seekPos < (tailOffset + tailCurrSize)) { + bufferTail.put((int) (seekPos - tailOffset), (byte) (b & 0xff)); + wroteTail = true; + break; + } else { + RAFile.seek(seekPos); + RAFile.write(b); + break; + } + } + } + seekPos++; + } + + public int read(byte[] b, int off, int len) throws IOException { + for (int i = 0; i < len; i++) { + b[off + i] = (byte) read(); + } + return len; + } + + public void write(byte[] b, int off, int len) throws IOException { + for (int i = 0; i < len; i++) { + write(b[off + i]); + } + } + + public void seek(long pos) throws IOException { + seekPos = pos; + } + + public void close() throws IOException { + if (wroteHead) { + bufferHead.force(); + System.out.println("wrote " + name + " head"); + } + if ((wroteBody) && (mapBody)) { + bufferBody.force(); + System.out.println("wrote " + name + " body"); + } + if (wroteTail) { + bufferTail.force(); + System.out.println("wrote " + name + " tail"); + } + RAChannel.close(); + RAFile.close(); + } + + + public static void test1(kelondroRA ra) throws IOException { + for (int i = 0; i < 2048; i++) { + ra.seek(i); + ra.write(32); + } + } + + public static void main(String[] args) { + // tests... + File f = new File("/yacy/nio.test.txt"); + if (f.exists()) f.delete(); + + System.out.println("* fill with blanks"); + try { kelondroRA ra = new kelondroNIOFileRA(f, true, 2046); test1(ra); ra.close(); + } catch (IOException e) { e.printStackTrace(); } + + System.out.println("* write in at head"); + try { kelondroRA ra = new kelondroNIOFileRA(f, true, 10); + ra.seek(8); ra.write((byte) 'h'); + ra.close(); + } catch (IOException e) { e.printStackTrace(); } + + System.out.println("* write in at body"); + try { kelondroRA ra = new kelondroNIOFileRA(f, true, 10); + ra.seek(1024); ra.write((byte) 'b'); + ra.close(); + } catch (IOException e) { e.printStackTrace(); } + + System.out.println("* write in at tail"); + try { kelondroRA ra = new kelondroNIOFileRA(f, true, 10); + ra.seek(2048); ra.write((byte) 't'); + ra.close(); + } catch (IOException e) { e.printStackTrace(); } + + System.out.println("* write in behind tail"); + try { kelondroRA ra = new kelondroNIOFileRA(f, true, 10); + ra.seek(2059); ra.write((byte) 'x'); + ra.close(); + } catch (IOException e) { e.printStackTrace(); } + } + +} diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index 8e50bc7d5..aff2261a4 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -142,6 +142,7 @@ public class kelondroRecords { throw new IOException("kelondroRecords: tree file " + file + " already exist"); this.filename = file.getCanonicalPath(); kelondroRA raf = new kelondroFileRA(this.filename); + //kelondroRA raf = new kelondroNIOFileRA(this.filename, false, 10000); init(raf, ohbytec, ohhandlec, columns, FHandles, txtProps, txtPropWidth); this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); if (cachesize <= 0) { @@ -239,6 +240,7 @@ public class kelondroRecords { this.filename = file.getCanonicalPath(); kelondroRA raf = new kelondroFileRA(this.filename); + //kelondroRA raf = new kelondroNIOFileRA(this.filename, (file.length() < 4000000), 10000); init(raf); this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); if (cachesize <= 0) { diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java index 85262aeec..ef980f6ca 100644 --- a/source/de/anomic/plasma/plasmaCrawlProfile.java +++ b/source/de/anomic/plasma/plasmaCrawlProfile.java @@ -59,6 +59,12 @@ public class plasmaCrawlProfile { } } + public void close() { + try { + profileTable.close(); + } catch (IOException e) {} + } + public int size() { return profileTable.size(); } diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 4406f1bb7..57c5ec71b 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -130,6 +130,10 @@ public final class plasmaHTCache { serverInstantThread.oneTimeJob(this, "cacheScan", log, 5000); } + public void close() throws IOException { + responseHeaderDB.close(); + } + private String ageString(long date, File f) { String s = Integer.toHexString(f.hashCode()); while (s.length() < 8) s = "0" + s; diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 26aa47776..915feeb2c 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -60,6 +60,10 @@ public class plasmaParser { } + public void close() { + // frees resources; does nothing yet + } + public document parseSource(URL location, String mimeType, byte[] source) { // make a scraper and transformer htmlFilterContentScraper scraper = new htmlFilterContentScraper(location); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index badd9b13a..957ff8b0b 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -358,17 +358,22 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi terminateAllThreads(true); log.logSystem("SWITCHBOARD SHUTDOWN STEP 2: sending termination signal to threaded indexing (stand by..)"); int waitingBoundSeconds = Integer.parseInt(getConfig("shutdownWaiting", "120")); - wordIndex.terminate(waitingBoundSeconds); + wordIndex.close(waitingBoundSeconds); log.logSystem("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager"); try { wikiDB.close(); messageDB.close(); facilityDB.close(); loadedURL.close(); + noticeURL.close(); + errorURL.close(); + profiles.close(); + parser.close(); + cacheManager.close(); } catch (IOException e) {} log.logSystem("SWITCHBOARD SHUTDOWN TERMINATED"); } - + public int totalSize() { return processStack.size() + cacheLoader.size() + noticeURL.stackSize(); } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index d5ee6fce6..2cb9c431f 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -77,8 +77,8 @@ public class plasmaWordIndex { return ramCache.removeEntriesMem(wordHash, urlHashes, deleteComplete); } - public void terminate(int waitingBoundSeconds) { - ramCache.terminate(waitingBoundSeconds); + public void close(int waitingBoundSeconds) { + ramCache.close(waitingBoundSeconds); } public synchronized void deleteComplete(String wordHash) throws IOException { diff --git a/source/de/anomic/plasma/plasmaWordIndexRAMCache.java b/source/de/anomic/plasma/plasmaWordIndexRAMCache.java index fe9e198f9..26890a893 100644 --- a/source/de/anomic/plasma/plasmaWordIndexRAMCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexRAMCache.java @@ -60,6 +60,7 @@ public class plasmaWordIndexRAMCache extends Thread { kelondroMScoreCluster hashScore; plasmaWordIndexFileCache pic; boolean terminate; + long terminateUntil; int maxWords; static { @@ -76,7 +77,7 @@ public class plasmaWordIndexRAMCache extends Thread { this.maxWords = maxWords; this.terminate = false; } - + public void run() { serverLog.logSystem("PLASMA INDEXING", "started word cache management"); int check; @@ -101,32 +102,40 @@ public class plasmaWordIndexRAMCache extends Thread { // close all; try { // first flush everything - while (hashScore.size() > 0) flushSpecific(false); + while ((hashScore.size() > 0) && (System.currentTimeMillis() < terminateUntil)) { + flushSpecific(false); + } // then close file cache: pic.close(); } catch (IOException e) { serverLog.logDebug("PLASMA INDEXING", "interrupted final flush: " + e.toString()); } + // report + if (hashScore.size() == 0) + serverLog.logSystem("PLASMA INDEXING", "finished final flush; flushed all words"); + else + serverLog.logError("PLASMA INDEXING", "terminated final flush; " + hashScore.size() + " words lost"); + // delete data cache = null; hashScore = null; - serverLog.logSystem("PLASMA INDEXING", "finished final flush"); + } - public void terminate(int waitingBoundSeconds) { + public void close(int waitingBoundSeconds) { terminate = true; // wait until terination is done // we can do at least 6 flushes/second int waitingtime = 10 + (((cache == null) ? 0 : cache.size()) / 5); // seconds if (waitingtime > waitingBoundSeconds) waitingtime = waitingBoundSeconds; // upper bound + this.terminateUntil = System.currentTimeMillis() + (waitingtime * 1000); + terminate = true; while ((cache != null) && (waitingtime > 0)) { serverLog.logDebug("PLASMA INDEXING", "final word flush; cache.size=" + cache.size() + "; time-out in " + waitingtime + " seconds"); try {Thread.currentThread().sleep(5000);} catch (InterruptedException e) {} waitingtime -= 5; } - if (cache != null) serverLog.logError("PLASMA INDEXING", "Cache was not flushed completely; " + hashScore.size() + " words lost"); - } private synchronized int flushSpecific(boolean greatest) throws IOException {