added kelondroArray, the basis for upcoming kelondroHash and some bug fixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@311 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent afe7cbe4de
commit 68dc2b0c6b

@ -399,4 +399,21 @@
</java>
</target>
<!-- run a single file (selected in NetBeans4) -->
<target name="run-single" depends="compile" description="Run Single File">
<fail unless="classname">Must set property 'classname'</fail>
<java classname="yacy" fork="yes">
<classpath refid="run.classpath"/>
<classpath>
<pathelement location="${build}"/>
<pathelement location="${htroot}"/>
<pathelement location="${lib}/commons-collections.jar" />
<pathelement location="${lib}/commons-pool-1.2.jar" />
<pathelement location="${libx}" />
<fileset dir="${libx}" includes="**/*.jar" />
</classpath>
<arg line=""/>
</java>
</target>
</project>

@ -0,0 +1,159 @@
// kelondroArray.java
// ------------------
// part of the Kelondro Database
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// last major change: 20.06.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
/*
This class extends the kelondroRecords and adds a array structure
*/
package de.anomic.kelondro;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Iterator;
import java.util.StringTokenizer;
public class kelondroArray extends kelondroRecords {
// define the Over-Head-Array
private static short thisOHBytes = 0; // our record definition does not need extra bytes
private static short thisOHHandles = 0; // and two handles overhead for a double-chained list
public kelondroArray(File file, int[] columns, int intprops) throws IOException {
// this creates a new tree
super(file, 0, thisOHBytes, thisOHHandles, columns, intprops, columns.length /*txtProps*/, 80 /*txtPropWidth*/);
for (int i = 0; i < intprops; i++) setHandle(i, new Handle(0));
}
public kelondroArray(File file) throws IOException{
// this opens a file with an existing array
super(file, 0);
}
public synchronized byte[][] set(int index, byte[][] row) throws IOException {
if (row.length != columns()) throw new IllegalArgumentException("set: wrong row length " + row.length + "; must be " + columns());
// make room for element
if (size() <= index) while (newNode() <= index) {}
// get the node at position index
Node n = getNode(new Handle(index));
// write the row
byte[][] before = n.setValues(row);
return before;
}
public synchronized byte[][] get(int index) throws IOException {
if (index >= size()) throw new kelondroException(filename, "out of bounds, index=" + index + ", size=" + size());
return getNode(new Handle(index)).getValues();
}
public void print() throws IOException {
System.out.println("PRINTOUT of table, length=" + size());
byte[][] row;
for (int i = 0; i < size(); i++) {
System.out.print("row " + i + ": ");
row = get(i);
for (int j = 0; j < columns(); j++) System.out.print(new String(row[j]) + ", ");
System.out.println();
}
System.out.println("EndOfTable");
}
private static void cmd(String[] args) {
/*
java -classpath classes de.anomic.kelondro.kelondroArray -c testarray.array 40
java -classpath classes de.anomic.kelondro.kelondroArray -v testarray.array
*/
System.out.print("kelondroArray ");
for (int i = 0; i < args.length; i++) System.out.print(args[i] + " ");
System.out.println("");
try {
if ((args.length == 3) && (args[0].equals("-c"))) {
// create <filename> <valuelen>
File f = new File(args[1]);
if (f.exists()) f.delete();
int[] lens = new int[1];
lens[0] = Integer.parseInt(args[2]);
kelondroArray fm = new kelondroArray(f, lens, 2);
fm.close();
} else
if ((args.length == 2) && (args[0].equals("-v"))) {
// view <filename>
kelondroArray fm = new kelondroArray(new File(args[1]));
fm.print();
fm.print(true);
fm.close();
} else
if ((args.length == 3) && (args[0].equals("-g"))) {
// get <filename> <index>
kelondroArray fm = new kelondroArray(new File(args[1]));
byte[][] row = fm.get(Integer.parseInt(args[2]));
for (int j = 0; j < fm.columns(); j++) System.out.print(new String(row[j]) + " ");
System.out.println();
fm.close();
} else
if ((args.length == 4) && (args[0].equals("-s"))) {
// set <filename> <index> <value>
kelondroArray fm = new kelondroArray(new File(args[1]));
byte[][] row = new byte[][]{args[3].getBytes()};
fm.set(Integer.parseInt(args[2]), row);
fm.close();
} else {
System.err.println("usage: kelondroArray -c|-v|-s|-g [file]|[index [value]] <db-file>");
System.err.println("( create, view, set, get)");
System.exit(0);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
cmd(args);
}
}

@ -78,7 +78,8 @@ public class kelondroRecords {
// constants
private static final int NUL = Integer.MIN_VALUE; // the meta value for the kelondroRecords' NUL abstraction
public static final long memBlock = 5000000; // do not fill cache further if the amount of available memory is less that this
public static final long memBlock = 5000000; // do not fill cache further if the amount of available memory is less that this
public static final long memKcolb = 10000000; // if the amount of available memory is greater than this, do not use cache size to block, simply use memory
// static seek pointers
private static long POS_MAGIC = 0; // 1 byte, byte: file type magic
@ -240,7 +241,7 @@ public class kelondroRecords {
public kelondroRecords(File file, long buffersize) throws IOException{
// opens an existing tree
if (!file.exists()) throw new IOException("kelondroRecords: tree file " + file + " does not exist");
if (!file.exists()) throw new IOException("kelondroRecords: file " + file.getAbsoluteFile().toString() + " does not exist");
this.filename = file.getCanonicalPath();
kelondroRA raf = new kelondroFileRA(this.filename);
@ -317,10 +318,19 @@ public class kelondroRecords {
}
protected int newNode() {
Node n = new Node();
return USEDC + FREEC;
}
protected Node newNode(byte[][] v) {
return new Node(v);
}
protected Node getNode(Handle handle) {
return getNode(handle, null, 0);
}
protected Node getNode(Handle handle, Node parentNode, int referenceInParent) {
if (XcacheSize == 0) return new Node(handle, parentNode, referenceInParent);
synchronized (XcacheHeaders) {
@ -328,6 +338,7 @@ public class kelondroRecords {
if (n == null) {
n = new Node(handle, parentNode, referenceInParent);
checkCacheSpace();
n.updateNodeCache();
return n;
} else {
//System.out.println("read from cache " + n.toString());
@ -354,8 +365,11 @@ public class kelondroRecords {
// should be only called within a synchronized(XcacheHeaders) environment
if (XcacheSize == 0) return;
Handle delkey;
while ((XcacheHeaders.size() >= XcacheSize) ||
((XcacheHeaders.size() > 0) && (Runtime.getRuntime().freeMemory() < memBlock))) {
long free = Runtime.getRuntime().freeMemory();
int count = 0;
while ((count++ < 100) && (free < memKcolb) &&
((XcacheHeaders.size() >= XcacheSize) ||
((XcacheHeaders.size() > 0) && (free < memBlock)))) {
// delete one entry
try {
delkey = (Handle) XcacheScore.getMinObject(); // error (see below) here
@ -401,7 +415,22 @@ public class kelondroRecords {
private byte[][] values = null; // an array of byte[] nodes is the value vector
private Handle handle = new Handle(NUL); // index of the entry, by default NUL means undefined
private Node(byte[][] v) {
private Node() {
// create a new empty node and reserve empty space in file for it
// use this method only if you want to extend the file with new entries
// without the need to have content in it.
try {
this.handle = new Handle(NUL);
this.values = new byte[COLWIDTHS.length][];
for (int i = 0; i < COLWIDTHS.length; i++) this.values[i] = null;
this.ohBytes = null;
this.ohHandle = null;
save();
} catch (IOException e) {
throw new kelondroException(filename, "kelondro file out of space");
}
}
private Node(byte[][] v) {
// this defines an entry, but it does not lead to writing these entry values to the file
// storing this entry can be done using the 'save()' command
if (v == null) throw new IllegalArgumentException("Node values = NULL");
@ -412,7 +441,20 @@ public class kelondroRecords {
this.ohBytes = null;
this.ohHandle = null;
}
private Node(Handle handle, Node parentNode, int referenceInParent) {
private Node(Handle handle) {
// this creates an entry with an pre-reserved entry position
// values can be written using the setValues() method
// but we expect that values are already there in the file ready to be read which we do not here
if (handle == null) throw new IllegalArgumentException("INTERNAL ERROR: node handle is null.");
if (handle.index > USEDC + FREEC) throw new kelondroException(filename, "INTERNAL ERROR: node handle index exceeds size.");
// set values and read node
this.values = null;
this.handle.index = handle.index;
this.ohBytes = null;
this.ohHandle = null;
}
private Node(Handle handle, Node parentNode, int referenceInParent) {
// this creates an entry with an pre-reserved entry position
// values can be written using the setValues() method
// but we expect that values are already there in the file ready to be read which we do not here
@ -439,7 +481,6 @@ public class kelondroRecords {
this.handle.index = handle.index;
this.ohBytes = null;
this.ohHandle = null;
updateNode();
}
public void finalize() {
@ -466,7 +507,7 @@ public class kelondroRecords {
entryFile.writeByte(b[j]);
}
}
updateNode();
updateNodeCache();
}
protected void setOHHandle(Handle[] i) throws IOException {
if (i == null) throw new IllegalArgumentException("setOHint: setting null value does not make any sense");
@ -483,7 +524,7 @@ public class kelondroRecords {
entryFile.writeInt(i[j].index);
}
}
updateNode();
updateNodeCache();
}
protected byte[] getOHByte() throws IOException {
if (ohBytes == null) {
@ -495,7 +536,6 @@ public class kelondroRecords {
ohBytes[j] = entryFile.readByte();
}
}
updateNode();
}
return ohBytes;
}
@ -511,7 +551,6 @@ public class kelondroRecords {
ohHandle[j] = (i == NUL) ? null : new Handle(i);
}
}
updateNode();
}
return ohHandle;
}
@ -541,9 +580,9 @@ public class kelondroRecords {
seek = seek + COLWIDTHS[i];
}
}
updateNodeCache();
}
//System.out.print("setValues result: "); for (int i = 0; i < values.length; i++) System.out.print(new String(result[i]) + " "); System.out.println(".");
updateNode();
return result; // return previous value
}
@ -560,7 +599,6 @@ public class kelondroRecords {
entryFile.read(values[0], 0, values[0].length);
}
for (int i = 1; i < COLWIDTHS.length; i++) values[i] = null;
updateNode();
return values[0];
}
} else {
@ -584,7 +622,6 @@ public class kelondroRecords {
seek = seek + COLWIDTHS[i];
}
}
updateNode();
return values;
}
} else if ((values.length > 1) && (values[1] == null)) {
@ -598,7 +635,6 @@ public class kelondroRecords {
seek = seek + COLWIDTHS[i];
}
}
updateNode();
return values;
} else {
return values;
@ -617,7 +653,8 @@ public class kelondroRecords {
// or by recycling used records
this.handle = new Handle();
// place the data to the file
if ((values == null) || ((values != null) && (values.length > 1) && (values[1] == null))) {
//if ((values == null) || ((values != null) && (values.length > 1) && (values[1] == null))) {
if (values == null) {
// there is nothing to save
throw new kelondroException(filename, "no values to save");
}
@ -627,7 +664,7 @@ public class kelondroRecords {
if (ohHandle == null) {for (int i = 0; i < OHHANDLEC; i++) entryFile.writeInt(0);}
else {for (int i = 0; i < OHHANDLEC; i++) entryFile.writeInt(ohHandle[i].index);}
long seek = seekpos(this.handle) + overhead;
for (int i = 0; i < values.length; i++) {
for (int i = 0; i < values.length; i++) {
entryFile.seek(seek);
if (values[i] == null) {
for (int j = 0; j < COLWIDTHS[i]; j++) entryFile.writeByte(0);
@ -639,7 +676,6 @@ public class kelondroRecords {
}
seek = seek + COLWIDTHS[i];
}
updateNode();
}
public String toString() {
if (this.handle.index == NUL) return "NULL";
@ -657,7 +693,7 @@ public class kelondroRecords {
}
return s;
}
private void updateNode() {
private void updateNodeCache() {
if (this.handle == null) return;
if (this.values == null) return;
if (this.ohBytes == null) return;
@ -892,7 +928,7 @@ public class kelondroRecords {
}
}
}
private Handle(int index) {
protected Handle(int index) {
this.index = index;
}
public String toString() {

@ -148,7 +148,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load slots
public static int crawlSlots = 12;
public static int crawlSlots = 10;
// couloured list management
public static TreeSet blueList = null;

@ -141,6 +141,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
long updateTime;
plasmaWordIndexEntry wordEntry;
byte[][] row = new byte[5][];
System.gc(); // this can speed up the assortment, because they may better use the cache
while (i.hasNext()) {
// get entries
entry = (Map.Entry) i.next();
@ -163,11 +164,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
}
wordcount++;
i.remove(); // free some mem
// write a log
if (System.currentTimeMillis() > messageTime) {
System.gc(); // for better statistic
wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime);
log.logInfo("dumping status: " + wordcount + " words done, " + ((cache.size() - wordcount) / wordsPerSecond) + " seconds remaining");
log.logInfo("dumping status: " + wordcount + " words done, " + (cache.size() / wordsPerSecond) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
messageTime = System.currentTimeMillis() + 5000;
}
}
@ -194,6 +197,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
plasmaWordIndexEntry wordEntry;
byte[][] row;
Runtime rt = Runtime.getRuntime();
System.gc(); // this is not for performance, but only to make the statistic work better
while (i.hasNext()) {
// get out one entry
node = (kelondroRecords.Node) i.next();
@ -205,16 +209,12 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
addEntry(wordHash, wordEntry, creationTime);
urlCount++;
// protect against memory shortage
while (rt.freeMemory() < 1000000) {
//System.out.print("FLUSH+GC bevore=" + rt.freeMemory());
flushFromMem();
System.gc();
//System.out.println(", after=" + rt.freeMemory());
}
while (rt.freeMemory() < 1000000) {flushFromMem(); System.gc();}
// write a log
if (System.currentTimeMillis() > messageTime) {
System.gc(); // for better statistic
urlsPerSecond = 1 + urlCount * 1000 / (1 + System.currentTimeMillis() - startTime);
log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpStack.size() - urlCount) / urlsPerSecond) + " seconds remaining");
log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpStack.size() - urlCount) / urlsPerSecond) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
messageTime = System.currentTimeMillis() + 5000;
}
}
@ -552,6 +552,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
flushThread.pause();
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
while (cache.size() >= this.maxWords) flushFromMem();
if ((cache.size() > 10000) && (Runtime.getRuntime().freeMemory() < 11000000)) flushFromMem();
while ((cache.size() > 0) && (Runtime.getRuntime().freeMemory() < 1000000)) {
flushFromMem();
System.gc();

@ -508,12 +508,17 @@ public final class yacy {
File dbroot = new File(new File(homePath), "DATA/PLASMADB");
try {
serverLog log = new serverLog("WORDMIGRATION");
log.logInfo("STARTING MIGRATION");
plasmaWordIndex wordIndex = new plasmaWordIndex(dbroot, 20000, log);
enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true);
while (words.hasMoreElements()) {
while (words.hasMoreElements()) try {
checkMigrate(dbroot, log, (File) words.nextElement(), wordIndex);
} catch (Exception e) {
e.printStackTrace();
}
log.logInfo("FINISHED MIGRATION JOB, WAIT FOR DUMP");
wordIndex.close(60);
log.logInfo("TERMINATED MIGRATION");
} catch (IOException e) {
e.printStackTrace();
}

Loading…
Cancel
Save