From 6ad471ef960efd44da8400c78008ed76107e0194 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 16 Aug 2006 19:49:31 +0000 Subject: [PATCH] * applied many compiler warning recommendations * cleaned up code * added unit test code * migrated ranking RCI computation to kelondroFlex and kelondroCollectionIndex git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2414 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Blog.java | 6 +- htroot/Network.java | 2 +- htroot/SettingsAck_p.java | 2 +- htroot/Settings_p.java | 2 +- htroot/yacy/message.java | 2 +- source/dbtest.java | 46 ++- source/de/anomic/data/blogBoard.java | 10 +- source/de/anomic/data/gettext.java | 4 +- .../htmlFilter/htmlFilterContentScraper.java | 4 +- .../anomic/http/httpChunkedInputStream.java | 1 - source/de/anomic/http/httpTemplate.java | 1 - source/de/anomic/index/indexRAMCacheRI.java | 10 +- .../kelondro/kelondroAbstractOrder.java | 4 +- .../de/anomic/kelondro/kelondroAttrSeq.java | 100 ++++-- .../anomic/kelondro/kelondroBase64Order.java | 3 +- .../kelondro/kelondroBufferedIOChunks.java | 20 +- .../kelondro/kelondroCollectionIndex.java | 25 +- source/de/anomic/kelondro/kelondroDyn.java | 4 +- .../de/anomic/kelondro/kelondroDynTree.java | 5 +- .../kelondro/kelondroFixedWidthArray.java | 52 ++- .../de/anomic/kelondro/kelondroFlexTable.java | 16 +- .../kelondro/kelondroFlexWidthArray.java | 64 +++- .../de/anomic/kelondro/kelondroHashtable.java | 12 +- .../de/anomic/kelondro/kelondroMHashMap.java | 11 +- .../kelondro/kelondroMScoreCluster.java | 4 +- .../de/anomic/kelondro/kelondroMSetTools.java | 20 +- .../de/anomic/kelondro/kelondroRecords.java | 296 ++++++++---------- source/de/anomic/kelondro/kelondroRow.java | 108 +++++-- source/de/anomic/kelondro/kelondroRowSet.java | 2 +- source/de/anomic/kelondro/kelondroStack.java | 10 +- source/de/anomic/kelondro/kelondroTree.java | 121 ++++--- source/de/anomic/plasma/plasmaCondenser.java | 4 +- source/de/anomic/plasma/plasmaCrawlNURL.java | 2 +- .../anomic/plasma/plasmaRankingCRProcess.java | 243 ++++++++++++-- .../plasma/plasmaRankingRCIEvaluation.java | 4 +- .../anomic/plasma/plasmaSwitchboardQueue.java | 2 +- source/de/anomic/plasma/plasmaWordIndex.java | 2 +- .../plasma/plasmaWordIndexFileCluster.java | 4 +- source/de/anomic/tools/loaderThreads.java | 8 +- source/de/anomic/yacy/yacyDHTAction.java | 4 +- source/de/anomic/yacy/yacyNewsDB.java | 6 +- source/de/anomic/yacy/yacySeedDB.java | 6 +- 42 files changed, 811 insertions(+), 441 deletions(-) diff --git a/htroot/Blog.java b/htroot/Blog.java index 2a70d010e..6bfc46ea2 100644 --- a/htroot/Blog.java +++ b/htroot/Blog.java @@ -153,10 +153,8 @@ public class Blog { } catch (UnsupportedEncodingException e) { subject = StrSubject.getBytes(); } - - try { - switchboard.blogDB.write(switchboard.blogDB.newEntry(pagename, subject, author, ip, date, content)); - } catch (IOException e) {} + + switchboard.blogDB.write(switchboard.blogDB.newEntry(pagename, subject, author, ip, date, content)); // create a news message HashMap map = new HashMap(); diff --git a/htroot/Network.java b/htroot/Network.java index a7e1dfe1f..df4cb8396 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -339,7 +339,7 @@ public class Network { } if (((startURL = (String) isCrawling.get(seed.hash)) != null) && (PPM >= 10)) { prop.put(STR_TABLE_LIST + conCount + "_isCrawling", 1); - prop.put(STR_TABLE_LIST + conCount + "_isCrawling_page", startURL);; + prop.put(STR_TABLE_LIST + conCount + "_isCrawling_page", startURL); } prop.put(STR_TABLE_LIST + conCount + "_hash", seed.hash); String shortname = seed.get(yacySeed.NAME, "deadlink"); diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java index 4365912a7..87216ced2 100644 --- a/htroot/SettingsAck_p.java +++ b/htroot/SettingsAck_p.java @@ -562,7 +562,7 @@ public class SettingsAck_p { // were changed, we now try to upload the seed list with the new settings if (env.getConfig("seedUploadMethod","none").equalsIgnoreCase(uploaderName)) { String error; - if ((error = ((plasmaSwitchboard)env).yc.saveSeedList(env)) == null) {; + if ((error = ((plasmaSwitchboard)env).yc.saveSeedList(env)) == null) { // we have successfully uploaded the seed file prop.put("info", 13); diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java index ba94e66ad..a2f1c3fda 100644 --- a/htroot/Settings_p.java +++ b/htroot/Settings_p.java @@ -127,7 +127,7 @@ public final class Settings_p { for (int i=0; i < forwardingMethods.length; i++) { try { - Class forwarder = Class.forName(env.getConfig("portForwarding." + forwardingMethods[i],"")); // FIXME: why is forwarder declared, but never used? + //Class forwarder = Class.forName(env.getConfig("portForwarding." + forwardingMethods[i],"")); // FIXME: why is forwarder declared, but never used? prop.put("forwardingMethods_" + methodCount + "_name",forwardingMethods[i]); prop.put("forwardingMethods_" + methodCount + "_selected", forwardingMethods[i].equals(currentForwarder)?1:0); methodCount++; diff --git a/htroot/yacy/message.java b/htroot/yacy/message.java index b941dfecb..9ab477796 100644 --- a/htroot/yacy/message.java +++ b/htroot/yacy/message.java @@ -156,7 +156,7 @@ public final class message { } catch (IOException e) { serverLog.logSevere("MESSAGE", "NEW MESSAGE ARRIVED! (error: " + e.getMessage() + ")"); - }; + } } // System.out.println("respond = " + prop.toString()); diff --git a/source/dbtest.java b/source/dbtest.java index 7f9162f90..d40fed422 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -290,6 +290,50 @@ public class dbtest { } } + if (command.equals("ramtest")) { + // fill database with random entries and delete them again; + // this is repeated without termination; after each loop + // the current ram is printed out + // args: + long count = Long.parseLong(args[3]); + long randomstart = Long.parseLong(args[4]); + byte[] key; + Random random; + long start, write, remove; + int loop = 0; + while (true) { + // write + random = new Random(randomstart); + start = System.currentTimeMillis(); + for (int i = 0; i < count; i++) { + key = randomHash(random); + table.put(table.row().newEntry(new byte[][]{key, key, dummyvalue2})); + } + write = System.currentTimeMillis() - start; + + // delete + random = new Random(randomstart); + start = System.currentTimeMillis(); + for (int i = 0; i < count; i++) { + key = randomHash(random); + table.remove(key); + } + remove = System.currentTimeMillis() - start; + + System.out.println("Loop " + loop + ": Write = " + write + ", Remove = " + remove); + System.out.println(" bevore GC: " + + "free = " + Runtime.getRuntime().freeMemory() + + ", max = " + Runtime.getRuntime().maxMemory() + + ", total = " + Runtime.getRuntime().totalMemory()); + System.gc(); + System.out.println(" after GC: " + + "free = " + Runtime.getRuntime().freeMemory() + + ", max = " + Runtime.getRuntime().maxMemory() + + ", total = " + Runtime.getRuntime().totalMemory()); + loop++; + } + } + if (command.equals("list")) { Iterator i = null; if (table instanceof kelondroSplittedTree) i = ((kelondroSplittedTree) table).rows(true, false, null); @@ -529,7 +573,7 @@ final class dbTable implements kelondroIndex { return null; } - public Iterator keys(boolean up, boolean rotating, byte[] startKey) throws IOException { + public Iterator keys(boolean up, boolean rotating, byte[] startKey) { // Objects are of type String return null; } diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index 8c56bf07e..963a4478f 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -135,7 +135,7 @@ public class blogBoard { return wikiBoard.guessAuthor(ip); } - public entry newEntry(String key, byte[] subject, byte[] author, String ip, Date date, byte[] page) throws IOException { + public entry newEntry(String key, byte[] subject, byte[] author, String ip, Date date, byte[] page) { return new entry(normalize(key), subject, author, ip, date, page); } @@ -144,7 +144,7 @@ public class blogBoard { String key; Map record; - public entry(String nkey, byte[] subject, byte[] author, String ip, Date date, byte[] page) throws IOException { + public entry(String nkey, byte[] subject, byte[] author, String ip, Date date, byte[] page) { record = new HashMap(); key = nkey; if (key.length() > keyLength) key = key.substring(0, keyLength); @@ -323,10 +323,8 @@ public class blogBoard { } catch (UnsupportedEncodingException e1) { page = StrPage.getBytes(); } - - try { - write (newEntry(key, subject, author, ip, date, page)); - } catch (IOException e) { } + + write (newEntry(key, subject, author, ip, date, page)); } return true; } diff --git a/source/de/anomic/data/gettext.java b/source/de/anomic/data/gettext.java index 606d69227..e6925a02b 100644 --- a/source/de/anomic/data/gettext.java +++ b/source/de/anomic/data/gettext.java @@ -37,9 +37,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; -import de.anomic.htmlFilter.htmlFilterAbstractTransformer; import de.anomic.htmlFilter.htmlFilterContentTransformer; -import de.anomic.server.serverAbstractSwitch; import de.anomic.server.logging.serverLog; public class gettext{ @@ -164,7 +162,7 @@ public class gettext{ ArrayList strings = transformer.getStrings(content.toString().getBytes()); return getGettextSource(inputfile, oldgettextmap, strings); } - public static ArrayList getGettextSource(File inputfile, Map oldgettextmap, ArrayList strings) throws FileNotFoundException{ + public static ArrayList getGettextSource(File inputfile, Map oldgettextmap, ArrayList strings) { if(oldgettextmap==null) oldgettextmap=new HashMap(); diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index db116699e..281213da3 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -197,10 +197,10 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen } } if (tagname.equalsIgnoreCase("area")) { - String title = cleanLine(tagopts.getProperty("title","")); + String areatitle = cleanLine(tagopts.getProperty("title","")); //String alt = tagopts.getProperty("alt",""); String href = tagopts.getProperty("href", ""); - if (href.length() > 0) anchors.put(absolutePath(href), title); + if (href.length() > 0) anchors.put(absolutePath(href), areatitle); } } diff --git a/source/de/anomic/http/httpChunkedInputStream.java b/source/de/anomic/http/httpChunkedInputStream.java index 484a34667..d41511dbe 100644 --- a/source/de/anomic/http/httpChunkedInputStream.java +++ b/source/de/anomic/http/httpChunkedInputStream.java @@ -244,7 +244,6 @@ public final class httpChunkedInputStream extends InputStream { static void exhaustInputStream(InputStream inStream) throws IOException { byte buffer[] = new byte[1024]; while (inStream.read(buffer) >= 0) { - ; } } } diff --git a/source/de/anomic/http/httpTemplate.java b/source/de/anomic/http/httpTemplate.java index 25921ac8b..26ef5ddac 100644 --- a/source/de/anomic/http/httpTemplate.java +++ b/source/de/anomic/http/httpTemplate.java @@ -56,7 +56,6 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PushbackInputStream; -import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import java.util.Hashtable; diff --git a/source/de/anomic/index/indexRAMCacheRI.java b/source/de/anomic/index/indexRAMCacheRI.java index c6383306a..063ffca0a 100644 --- a/source/de/anomic/index/indexRAMCacheRI.java +++ b/source/de/anomic/index/indexRAMCacheRI.java @@ -51,12 +51,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { // class variables private final File databaseRoot; - private final TreeMap wCache; // wordhash-container + protected final TreeMap wCache; // wordhash-container private final TreeMap kCache; // time-container; for karenz/DHT caching (set with high priority) private final kelondroMScoreCluster hashScore; private final kelondroMScoreCluster hashDate; private long kCacheInc = 0; - private long startTime; + private long initTime; private int wCacheMaxCount; public int wCacheReferenceLimit; private final serverLog log; @@ -78,7 +78,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { this.hashScore = new kelondroMScoreCluster(); this.hashDate = new kelondroMScoreCluster(); this.kCacheInc = 0; - this.startTime = System.currentTimeMillis(); + this.initTime = System.currentTimeMillis(); this.wCacheMaxCount = 10000; this.wCacheReferenceLimit = wCacheReferenceLimitInit; this.log = log; @@ -379,11 +379,11 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { } private int intTime(long longTime) { - return (int) Math.max(0, ((longTime - startTime) / 1000)); + return (int) Math.max(0, ((longTime - initTime) / 1000)); } private long longEmit(int intTime) { - return (((long) intTime) * (long) 1000) + startTime; + return (((long) intTime) * (long) 1000) + initTime; } public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime_dummy) { diff --git a/source/de/anomic/kelondro/kelondroAbstractOrder.java b/source/de/anomic/kelondro/kelondroAbstractOrder.java index 783316cc7..059068ecf 100644 --- a/source/de/anomic/kelondro/kelondroAbstractOrder.java +++ b/source/de/anomic/kelondro/kelondroAbstractOrder.java @@ -80,7 +80,7 @@ public abstract class kelondroAbstractOrder implements kelondroOrder { return zero; } - public void rotate(byte[] zero) { - this.zero = zero; + public void rotate(byte[] newzero) { + this.zero = newzero; } } diff --git a/source/de/anomic/kelondro/kelondroAttrSeq.java b/source/de/anomic/kelondro/kelondroAttrSeq.java index 8f8aba241..180619be0 100644 --- a/source/de/anomic/kelondro/kelondroAttrSeq.java +++ b/source/de/anomic/kelondro/kelondroAttrSeq.java @@ -50,9 +50,12 @@ import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.FileInputStream; import java.io.IOException; +import java.util.HashSet; import java.util.Map; import java.util.HashMap; +import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; import java.util.Iterator; import java.util.ArrayList; import java.util.StringTokenizer; @@ -67,7 +70,7 @@ public class kelondroAttrSeq { // class objects private File file; private Map entries; - private Structure structure; + protected Structure structure; private String name; private long created; @@ -117,8 +120,7 @@ public class kelondroAttrSeq { } else { br = new BufferedReader(new InputStreamReader(new FileInputStream(loadfile))); } - String line; - String key; + String line, key, oldvalue, newvalue; while ((line = br.readLine()) != null) { line = line.trim(); if (line.length() == 0) continue; @@ -136,10 +138,32 @@ public class kelondroAttrSeq { } if ((p = line.indexOf('=')) > 0) { key = line.substring(0, p).trim(); - if (entries.containsKey(key)) { - logWarning("read PropFile " + loadfile.toString() + ", key " + key + ": double occurrence"); + newvalue = line.substring(p + 1).trim(); + oldvalue = (String) entries.get(key); + if (oldvalue != null) { + if (newvalue.equals(oldvalue)) { + //logWarning("key " + key + ": double occurrence. values are equal. second appearance is ignored"); + } else { + if (newvalue.length() < oldvalue.length()) { + if (oldvalue.substring(0, newvalue.length()).equals(newvalue)) { + logWarning("key " + key + ": double occurrence. new value is subset of old value. second appearance is ignored"); + } else { + logWarning("key " + key + ": double occurrence. new value is shorter than old value, but not a subsequence. old = " + oldvalue + ", new = " + newvalue); + } + } else if (newvalue.length() > oldvalue.length()) { + if (newvalue.substring(0, oldvalue.length()).equals(oldvalue)) { + logWarning("key " + key + ": double occurrence. old value is subset of new value. first appearance is ignored"); + } else { + logWarning("key " + key + ": double occurrence. old value is shorter than new value, but not a subsequence. old = " + oldvalue + ", new = " + newvalue); + } + entries.put(key, newvalue); + } else { + logWarning("key " + key + ": double occurrence. old and new value have equal length but are not equal. old = " + oldvalue + ", new = " + newvalue); + //entries.put(key, newvalue); + } + } } else { - entries.put(key, line.substring(p + 1).trim()); + entries.put(key, newvalue); } } } @@ -189,10 +213,10 @@ public class kelondroAttrSeq { } public Entry newEntry(String pivot, boolean tree) { - return new Entry(pivot, new HashMap(), (tree) ? (Map) new TreeMap() : (Map) new HashMap()); + return new Entry(pivot, new HashMap(), (tree) ? (Set) new TreeSet() : (Set) new HashSet()); } - public Entry newEntry(String pivot, HashMap props, Map seq) { + public Entry newEntry(String pivot, HashMap props, Set seq) { return new Entry(pivot, props, seq); } @@ -237,7 +261,7 @@ public class kelondroAttrSeq { protected int[] prop_len = null, prop_pos = null; protected String[] seq_names = null; protected int[] seq_len = null, seq_pos = null; - + protected kelondroRow seqrow; // example: //# Structure=,'=',,,,,,,,,,,'|',* @@ -297,8 +321,23 @@ public class kelondroAttrSeq { seq_pos[i] = p; p += seq_len[i]; } + + // generate rowdef for seq row definition + StringBuffer rowdef = new StringBuffer(); + rowdef.append("byte[] "); + rowdef.append(seq_names[0]); + rowdef.append('-'); + rowdef.append(seq_len[0]); + + for (int i = 1; i < seq_names.length; i++) { + rowdef.append(", byte[] "); + rowdef.append(seq_names[i]); + rowdef.append('-'); + rowdef.append(seq_len[i]); + } + seqrow = new kelondroRow(new String(rowdef)); } - + public String toString() { StringBuffer sb = new StringBuffer(100); sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',"); @@ -320,9 +359,9 @@ public class kelondroAttrSeq { public class Entry { String pivot; HashMap attrs; - Map seq; + Set seq; - public Entry(String pivot, HashMap attrs, Map seq) { + public Entry(String pivot, HashMap attrs, Set seq) { this.pivot = pivot; this.attrs = attrs; this.seq = seq; @@ -331,22 +370,22 @@ public class kelondroAttrSeq { public Entry(String pivot, String attrseq, boolean tree) { this.pivot = pivot; attrs = new HashMap(); - seq = (tree) ? (Map) new TreeMap() : (Map) new HashMap(); + seq = (tree) ? (Set) new TreeSet() : (Set) new HashSet(); for (int i = 0; i < structure.prop_names.length; i++) { attrs.put(structure.prop_names[i], new Long(kelondroBase64Order.enhancedCoder.decodeLong(attrseq.substring(structure.prop_pos[i], structure.prop_pos[i] + structure.prop_len[i])))); } int p = attrseq.indexOf('|') + 1; - long[] seqattrs = new long[structure.seq_names.length - 1]; + //long[] seqattrs = new long[structure.seq_names.length - 1]; String seqname; while (p + structure.seq_len[0] <= attrseq.length()) { seqname = attrseq.substring(p, p + structure.seq_len[0]); p += structure.seq_len[0]; for (int i = 1; i < structure.seq_names.length; i++) { - seqattrs[i - 1] = kelondroBase64Order.enhancedCoder.decodeLong(attrseq.substring(p, p + structure.seq_len[i])); + //seqattrs[i - 1] = kelondroBase64Order.enhancedCoder.decodeLong(attrseq.substring(p, p + structure.seq_len[i])); p += structure.seq_len[i]; } - seq.put(seqname, seqattrs); + seq.add(seqname/*, seqattrs*/); } } @@ -364,16 +403,25 @@ public class kelondroAttrSeq { attrs.put(key, new Long(attr)); } - public Map getSeq() { + public Set getSeqSet() { return seq; } - public void setSeq(Map seq) { + public kelondroRowCollection getSeqCollection() { + kelondroRowCollection collection = new kelondroRowCollection(structure.seqrow, seq.size()); + Iterator i = seq.iterator(); + while (i.hasNext()) { + collection.add(structure.seqrow.newEntry(((String) i.next()).getBytes())); + } + return collection; + } + + public void setSeq(Set seq) { this.seq = seq; } - public void addSeq(String s, long[] seqattrs) { - this.seq.put(s, seqattrs); + public void addSeq(String s/*, long[] seqattrs*/) { + this.seq.add(s/*, seqattrs*/); } public String toString() { @@ -385,16 +433,16 @@ public class kelondroAttrSeq { sb.append(kelondroBase64Order.enhancedCoder.encodeLongSmart((val == null) ? 0 : val.longValue(), structure.prop_len[i])); } sb.append('|'); - Iterator q = seq.entrySet().iterator(); - Map.Entry entry; - long[] seqattrs; + Iterator q = seq.iterator(); + //long[] seqattrs; while (q.hasNext()) { - entry = (Map.Entry) q.next(); - sb.append((String) entry.getKey()); - seqattrs = (long[]) entry.getValue(); + sb.append((String) q.next()); + //seqattrs = (long[]) entry.getValue(); + /* for (int i = 1; i < structure.seq_names.length; i++) { sb.append(kelondroBase64Order.enhancedCoder.encodeLong(seqattrs[i - 1], structure.seq_len[i])); } + */ } return sb.toString(); } diff --git a/source/de/anomic/kelondro/kelondroBase64Order.java b/source/de/anomic/kelondro/kelondroBase64Order.java index 2ace58c4f..5af455596 100644 --- a/source/de/anomic/kelondro/kelondroBase64Order.java +++ b/source/de/anomic/kelondro/kelondroBase64Order.java @@ -118,9 +118,8 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond s.setLength(length); while (length > 0) s.setCharAt(--length, alpha[63]); return s.toString(); - } else { - return encodeLong(c, length); } + return encodeLong(c, length); } public final String encodeLong(long c, int length) { diff --git a/source/de/anomic/kelondro/kelondroBufferedIOChunks.java b/source/de/anomic/kelondro/kelondroBufferedIOChunks.java index f1119e273..45570089a 100644 --- a/source/de/anomic/kelondro/kelondroBufferedIOChunks.java +++ b/source/de/anomic/kelondro/kelondroBufferedIOChunks.java @@ -93,18 +93,16 @@ public final class kelondroBufferedIOChunks extends kelondroAbstractIOChunks imp this.ra.seek(pos + off); return ra.read(b, off, len); } - } else { - // use buffered entry - if (bb.length >= off + len) { - // the bufferd entry is long enough - System.arraycopy(bb, off, b, off, len); - return len; - } else { - // the entry is not long enough. transmit only a part - System.arraycopy(bb, off, b, off, bb.length - off); - return bb.length - off; - } } + // use buffered entry + if (bb.length >= off + len) { + // the bufferd entry is long enough + System.arraycopy(bb, off, b, off, len); + return len; + } + // the entry is not long enough. transmit only a part + System.arraycopy(bb, off, b, off, bb.length - off); + return bb.length - off; } } diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 1f6e4ab07..16c63b32c 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -36,12 +36,12 @@ import de.anomic.server.serverFileUtils; public class kelondroCollectionIndex { - private kelondroIndex index; + protected kelondroIndex index; private File path; private String filenameStub; private int loadfactor; private Map arrays; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects - private kelondroRow rowdef; // definition of the payload (chunks inside the collections) + private kelondroRow playloadrow; // definition of the payload (chunks inside the collections) // private int partitions; // this is the maxmimum number of array files; yet not used private static final int idx_col_key = 0; // the index @@ -87,7 +87,7 @@ public class kelondroCollectionIndex { // the buffersize is number of bytes that are only used if the kelondroFlexTable is backed up with a kelondroTree this.path = path; this.filenameStub = filenameStub; - this.rowdef = rowdef; + this.playloadrow = rowdef; this.loadfactor = loadfactor; // create index table @@ -113,11 +113,11 @@ public class kelondroCollectionIndex { } private kelondroFixedWidthArray openArrayFile(int partitionNumber, int serialNumber, boolean create) throws IOException { - File f = arrayFile(path, filenameStub, loadfactor, rowdef.objectsize(), partitionNumber, serialNumber); + File f = arrayFile(path, filenameStub, loadfactor, playloadrow.objectsize(), partitionNumber, serialNumber); int load = arrayCapacity(partitionNumber); kelondroRow rowdef = new kelondroRow( "byte[] key-" + index.row().width(0) + "," + - "byte[] collection-" + (kelondroRowCollection.exportOverheadSize + load * this.rowdef.objectsize()) + "byte[] collection-" + (kelondroRowCollection.exportOverheadSize + load * this.playloadrow.objectsize()) ); if (f.exists()) { return new kelondroFixedWidthArray(f, rowdef); @@ -195,7 +195,7 @@ public class kelondroCollectionIndex { overwrite(key, collection); } return 0; - } else { + } // overwrite the old collection // read old information int oldchunksize = (int) oldindexrow.getColLong(idx_col_chunksize); // needed only for migration @@ -245,7 +245,7 @@ public class kelondroCollectionIndex { // we don't need a new slot, just write into the old one // find array file - kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.rowdef.objectsize()); + kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.playloadrow.objectsize()); // define row kelondroRow.Entry arrayEntry = array.row().newEntry(); @@ -271,7 +271,6 @@ public class kelondroCollectionIndex { overwrite(key, collection); } return removed; - } } } @@ -280,7 +279,7 @@ public class kelondroCollectionIndex { // simply store a collection without check if the collection existed before // find array file - kelondroFixedWidthArray array = getArray(arrayIndex(collection.size()), 0, this.rowdef.objectsize()); + kelondroFixedWidthArray array = getArray(arrayIndex(collection.size()), 0, this.playloadrow.objectsize()); // define row kelondroRow.Entry arrayEntry = array.row().newEntry(); @@ -293,7 +292,7 @@ public class kelondroCollectionIndex { // store the new row number in the index kelondroRow.Entry indexEntry = index.row().newEntry(); indexEntry.setCol(idx_col_key, key); - indexEntry.setCol(idx_col_chunksize, this.rowdef.objectsize()); + indexEntry.setCol(idx_col_chunksize, this.playloadrow.objectsize()); indexEntry.setCol(idx_col_chunkcount, collection.size()); indexEntry.setCol(idx_col_indexpos, (long) newRowNumber); indexEntry.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); @@ -327,7 +326,7 @@ public class kelondroCollectionIndex { } } - private kelondroRowSet getdelete(kelondroRow.Entry indexrow, boolean remove, boolean deleteIfEmpty) throws IOException { + protected kelondroRowSet getdelete(kelondroRow.Entry indexrow, boolean remove, boolean deleteIfEmpty) throws IOException { // call this only within a synchronized(index) environment // read values @@ -343,14 +342,14 @@ public class kelondroCollectionIndex { if (arrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array does not contain expected row"); // read the row and define a collection - kelondroRowSet collection = new kelondroRowSet(this.rowdef, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize() + kelondroRowSet collection = new kelondroRowSet(this.playloadrow, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize() if (index.order().compare(arrayrow.getColBytes(0), indexrow.getColBytes(idx_col_key)) != 0) { // check if we got the right row; this row is wrong. Fix it: index.remove(indexrow.getColBytes(idx_col_key)); // the wrong row cannot be fixed // store the row number in the index; this may be a double-entry, but better than nothing kelondroRow.Entry indexEntry = index.row().newEntry(); indexEntry.setCol(idx_col_key, arrayrow.getColBytes(0)); - indexEntry.setCol(idx_col_chunksize, this.rowdef.objectsize()); + indexEntry.setCol(idx_col_chunksize, this.playloadrow.objectsize()); indexEntry.setCol(idx_col_chunkcount, collection.size()); indexEntry.setCol(idx_col_indexpos, (long) rownumber); indexEntry.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); diff --git a/source/de/anomic/kelondro/kelondroDyn.java b/source/de/anomic/kelondro/kelondroDyn.java index 38c45927f..df7ff9c84 100644 --- a/source/de/anomic/kelondro/kelondroDyn.java +++ b/source/de/anomic/kelondro/kelondroDyn.java @@ -62,7 +62,7 @@ public class kelondroDyn extends kelondroTree { private static final int counterlen = 8; - private int keylen; + protected int keylen; private int reclen; private int segmentCount; private char fillChar; @@ -123,7 +123,7 @@ public class kelondroDyn extends kelondroTree { return key.getBytes(); } - private String origKey(byte[] rawKey) { + protected String origKey(byte[] rawKey) { int n = keylen - 1; if (n >= rawKey.length) n = rawKey.length - 1; while ((n > 0) && (rawKey[n] == (byte) fillChar)) n--; diff --git a/source/de/anomic/kelondro/kelondroDynTree.java b/source/de/anomic/kelondro/kelondroDynTree.java index 0125b57d2..c64c4d278 100644 --- a/source/de/anomic/kelondro/kelondroDynTree.java +++ b/source/de/anomic/kelondro/kelondroDynTree.java @@ -143,9 +143,8 @@ public class kelondroDynTree { kelondroRA ra = table.getRA(key); treeRAHandles.put(key, ra); return new kelondroTree(ra, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent); - } else { - return null; } + return null; } protected void closeTree(String key) throws IOException { @@ -206,7 +205,7 @@ public class kelondroDynTree { protected class treeBuffer { private String tablename; - private Hashtable tbuffer; + protected Hashtable tbuffer; public long timestamp; treeBuffer(String tablename) { diff --git a/source/de/anomic/kelondro/kelondroFixedWidthArray.java b/source/de/anomic/kelondro/kelondroFixedWidthArray.java index 737db4fb1..a0bf160e7 100644 --- a/source/de/anomic/kelondro/kelondroFixedWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFixedWidthArray.java @@ -65,6 +65,10 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro if (exitOnFail) System.exit(-1); throw new RuntimeException("cannot set handle " + i + " / " + e.getMessage()); } + // store column description + for (int i = 0; i < rowdef.columns(); i++) { + try {super.setText(i, rowdef.column(i).toString().getBytes());} catch (IOException e) {} + } } public kelondroFixedWidthArray(File file, kelondroRow rowdef) throws IOException{ @@ -86,7 +90,7 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro n = getNode(new Handle(index)); // write the row - byte[] before = n.setValueRow(rowentry.bytes()); + byte[] before = n.setValueRow((rowentry == null) ? null : rowentry.bytes()); n.commit(CP_NONE); return row().newEntry(before); @@ -122,7 +126,7 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro public void print() throws IOException { System.out.println("PRINTOUT of table, length=" + size()); kelondroRow.Entry row; - for (int i = 0; i < size(); i++) { + for (int i = 0; i < super.USAGE.allCount(); i++) { System.out.print("row " + i + ": "); row = get(i); for (int j = 0; j < row.columns(); j++) System.out.print(((row.empty(j)) ? "NULL" : row.getColString(j, "UTF-8")) + ", "); @@ -132,11 +136,13 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro } public static void main(String[] args) { - File f = new File("d:\\\\mc\\privat\\fixtest.db"); - f.delete(); + //File f = new File("d:\\\\mc\\privat\\fixtest.db"); + File f = new File("/Users/admin/fixtest.db"); kelondroRow rowdef = new kelondroRow("byte[] a-12, byte[] b-4"); - kelondroFixedWidthArray k = new kelondroFixedWidthArray(f, rowdef, 6, true); try { + System.out.println("erster Test"); + f.delete(); + kelondroFixedWidthArray k = new kelondroFixedWidthArray(f, rowdef, 6, true); k.set(3, k.row().newEntry(new byte[][]{ "test123".getBytes(), "abcd".getBytes()})); k.add(k.row().newEntry(new byte[][]{ @@ -147,6 +153,42 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro System.out.println(k.get(2).toString()); System.out.println(k.get(3).toString()); System.out.println(k.get(4).toString()); + k.close(); + + System.out.println("zweiter Test"); + f.delete(); + k = new kelondroFixedWidthArray(f, rowdef, 6, true); + k.add(k.row().newEntry(new byte[][]{"a".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"b".getBytes(), "xxxx".getBytes()})); + k.remove(0); + + k.add(k.row().newEntry(new byte[][]{"c".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"d".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"e".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"f".getBytes(), "xxxx".getBytes()})); + k.remove(0); + k.remove(1); + + k.print(); + k.print(true); + k.close(); + + + System.out.println("dritter Test"); + f.delete(); + k = new kelondroFixedWidthArray(f, rowdef, 6, true); + for (int i = 1; i <= 200; i = i * 2) { + for (int j = 0; j < i*2; j++) { + k.add(k.row().newEntry(new byte[][]{(Integer.toString(i) + "-" + Integer.toString(j)).getBytes(), "xxxx".getBytes()})); + } + for (int j = 0; j < i; j++) { + k.remove(j); + } + } + k.print(); + k.print(true); + k.close(); + } catch (IOException e) { e.printStackTrace(); } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 92fdd03b9..78748ec1a 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -31,7 +31,7 @@ import java.util.Iterator; public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex { - private kelondroBytesIntMap index; + protected kelondroBytesIntMap index; public kelondroFlexTable(File path, String tablename, kelondroOrder objectOrder, long buffersize, long preloadTime, kelondroRow rowdef, boolean exitOnFail) throws IOException { super(path, tablename, rowdef, exitOnFail); @@ -103,9 +103,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr return ri; } - - private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException { - kelondroTree index = new kelondroTree(indexfile, buffersize, preloadTime, 10, + private kelondroTree initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException { + kelondroTree treeindex = new kelondroTree(indexfile, buffersize, preloadTime, 10, new kelondroRow("byte[] key-" + rowdef.width(0) + ", int reference-4 {b256}"), objectOrder, 2, 80, true); Iterator content = super.col[0].contentNodes(-1); @@ -115,16 +114,16 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr while (content.hasNext()) { node = (kelondroRecords.Node) content.next(); i = node.handle().hashCode(); - indexentry = index.row().newEntry(); + indexentry = treeindex.row().newEntry(); indexentry.setCol(0, node.getValueRow()); indexentry.setCol(1, i); - index.put(indexentry); + treeindex.put(indexentry); if ((i % 10000) == 0) { System.out.print('.'); System.out.flush(); } } - return index; + return treeindex; } public synchronized kelondroRow.Entry get(byte[] key) throws IOException { @@ -144,9 +143,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr if (i < 0) { index.puti(row.getColBytes(0), super.add(row)); return null; - } else { - return super.set(i, row); } + return super.set(i, row); } } diff --git a/source/de/anomic/kelondro/kelondroFlexWidthArray.java b/source/de/anomic/kelondro/kelondroFlexWidthArray.java index 0ccf3fe15..e2eb1990f 100644 --- a/source/de/anomic/kelondro/kelondroFlexWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFlexWidthArray.java @@ -163,9 +163,8 @@ public class kelondroFlexWidthArray implements kelondroArray { e = col[c].row().newEntry( rowentry.bytes(), rowdef.colstart[c], - rowdef.colstart[lastcol] - rowdef.colstart[c] - + rowdef.width(lastcol)); - col[c].set(index, e); + rowdef.colstart[lastcol] + rowdef.width(lastcol) - rowdef.colstart[c]); + col[c].set(index,e); c = c + col[c].row().columns(); } } @@ -190,8 +189,14 @@ public class kelondroFlexWidthArray implements kelondroArray { public void remove(int index) throws IOException { int r = 0; synchronized (col) { + + // remove only from the first column + col[0].remove(index); + r = r + col[r].row().columns(); + + // the other columns will be blanked out only while (r < rowdef.columns()) { - col[r].remove(index); + col[r].set(index, null); r = r + col[r].row().columns(); } } @@ -200,7 +205,7 @@ public class kelondroFlexWidthArray implements kelondroArray { public void print() throws IOException { System.out.println("PRINTOUT of table, length=" + size()); kelondroRow.Entry row; - for (int i = 0; i < size(); i++) { + for (int i = 0; i < col[0].USAGE.allCount(); i++) { System.out.print("row " + i + ": "); row = get(i); System.out.println(row.toString()); @@ -209,26 +214,51 @@ public class kelondroFlexWidthArray implements kelondroArray { } System.out.println("EndOfTable"); } - public static void main(String[] args) { - File f = new File("d:\\\\mc\\privat\\"); + //File f = new File("d:\\\\mc\\privat\\fixtest.db"); + File f = new File("/Users/admin/"); + kelondroRow rowdef = new kelondroRow("byte[] a-12, byte[] b-4"); + String testname = "flextest"; try { - kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow("byte[] a-12, byte[] b-4"), true); + System.out.println("erster Test"); + new File(f, testname).delete(); + + kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", rowdef, true); + k.add(k.row().newEntry(new byte[][]{"a".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"b".getBytes(), "xxxx".getBytes()})); + k.remove(0); + + k.add(k.row().newEntry(new byte[][]{"c".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"d".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"e".getBytes(), "xxxx".getBytes()})); + k.add(k.row().newEntry(new byte[][]{"f".getBytes(), "xxxx".getBytes()})); + k.remove(0); + k.remove(1); - k.set(3, k.row().newEntry(new byte[][]{ - "test123".getBytes(), "abcd".getBytes()})); - k.add(k.row().newEntry(new byte[][]{ - "test456".getBytes(), "efgh".getBytes()})); + k.print(); + k.col[0].print(true); + k.col[1].print(true); + k.close(); + + + System.out.println("zweiter Test"); + new File(f, testname).delete(); + k = new kelondroFlexWidthArray(f, "flextest", rowdef, true); + for (int i = 1; i <= 20; i = i * 2) { + for (int j = 0; j < i*2; j++) { + k.add(k.row().newEntry(new byte[][]{(Integer.toString(i) + "-" + Integer.toString(j)).getBytes(), "xxxx".getBytes()})); + } + for (int j = 0; j < i; j++) { + k.remove(j); + } + } + k.print(); + k.col[0].print(true); k.close(); - k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow("byte[] a-12, byte[] b-4"), true); - System.out.println(k.get(2).toString()); - System.out.println(k.get(3).toString()); - System.out.println(k.get(4).toString()); } catch (IOException e) { e.printStackTrace(); } } - } diff --git a/source/de/anomic/kelondro/kelondroHashtable.java b/source/de/anomic/kelondro/kelondroHashtable.java index b5fe8e096..65b02bbbe 100644 --- a/source/de/anomic/kelondro/kelondroHashtable.java +++ b/source/de/anomic/kelondro/kelondroHashtable.java @@ -134,13 +134,13 @@ import java.io.IOException; public class kelondroHashtable { - private kelondroFixedWidthArray hashArray; - private int offset; - private int maxk; - private int maxrehash; - private kelondroRow.Entry dummyRow; + private kelondroFixedWidthArray hashArray; + protected int offset; + protected int maxk; + private int maxrehash; + private kelondroRow.Entry dummyRow; - private static final byte[] dummyKey = kelondroBase64Order.enhancedCoder.encodeLong(0, 5).getBytes(); + private static final byte[] dummyKey = kelondroBase64Order.enhancedCoder.encodeLong(0, 5).getBytes(); public kelondroHashtable(File file, kelondroRow rowdef, int offset, int maxsize, int maxrehash, boolean exitOnFail) { // this creates a new hashtable diff --git a/source/de/anomic/kelondro/kelondroMHashMap.java b/source/de/anomic/kelondro/kelondroMHashMap.java index 0d3f0f198..402d96083 100644 --- a/source/de/anomic/kelondro/kelondroMHashMap.java +++ b/source/de/anomic/kelondro/kelondroMHashMap.java @@ -210,12 +210,11 @@ public class kelondroMHashMap { //System.out.println("get " + new String(key) + " from cell " + hash); if (hash < 0) { return null; - } else { - // read old entry - byte[] value = new byte[valuelen]; - System.arraycopy(mem, hash * reclen + keylen, value, 0, valuelen); - return value; } + // read old entry + byte[] value = new byte[valuelen]; + System.arraycopy(mem, hash * reclen + keylen, value, 0, valuelen); + return value; } public void remove(int key) { @@ -276,7 +275,7 @@ public class kelondroMHashMap { return; } - private int anyhashpos(int start) { + protected int anyhashpos(int start) { while (start < capacity()) { if (mem[start * reclen] != 0) return start; start++; diff --git a/source/de/anomic/kelondro/kelondroMScoreCluster.java b/source/de/anomic/kelondro/kelondroMScoreCluster.java index d32c92dfc..b12adc1c3 100644 --- a/source/de/anomic/kelondro/kelondroMScoreCluster.java +++ b/source/de/anomic/kelondro/kelondroMScoreCluster.java @@ -50,8 +50,8 @@ import java.util.Random; // only for testing public final class kelondroMScoreCluster { - private final TreeMap refkeyDB; // a mapping from a reference to the cluster key - private final TreeMap keyrefDB; // a mapping from the cluster key to the reference + protected final TreeMap refkeyDB; // a mapping from a reference to the cluster key + protected final TreeMap keyrefDB; // a mapping from the cluster key to the reference private long gcount; private int encnt; diff --git a/source/de/anomic/kelondro/kelondroMSetTools.java b/source/de/anomic/kelondro/kelondroMSetTools.java index 21409de98..f8bd9270c 100644 --- a/source/de/anomic/kelondro/kelondroMSetTools.java +++ b/source/de/anomic/kelondro/kelondroMSetTools.java @@ -95,13 +95,10 @@ public class kelondroMSetTools { // start most efficient method if (stepsEnum > stepsTest) { - if (map.size() < set.size()) - return joinConstructiveByTestSetInMap(map, set); - else - return joinConstructiveByTestMapInSet(map, set); - } else { + if (map.size() < set.size()) return joinConstructiveByTestSetInMap(map, set); + return joinConstructiveByTestMapInSet(map, set); + } return joinConstructiveByEnumeration(map, set); - } } private static TreeMap joinConstructiveByTestSetInMap(TreeMap map, TreeSet set) { @@ -167,13 +164,10 @@ public class kelondroMSetTools { // start most efficient method if (stepsEnum > stepsTest) { - if (set1.size() < set2.size()) - return joinConstructiveByTest(set1, set2); - else - return joinConstructiveByTest(set2, set1); - } else { - return joinConstructiveByEnumeration(set1, set2); - } + if (set1.size() < set2.size()) return joinConstructiveByTest(set1, set2); + return joinConstructiveByTest(set2, set1); + } + return joinConstructiveByEnumeration(set1, set2); } private static TreeSet joinConstructiveByTest(TreeSet small, TreeSet large) { diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index 6a7f57f4f..2f962dfc6 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -72,8 +72,10 @@ import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Random; +import java.util.Set; import java.util.StringTokenizer; import java.util.Iterator; +import java.util.TreeSet; import java.util.logging.Logger; public class kelondroRecords { @@ -93,24 +95,24 @@ public class kelondroRecords { public static final int CP_HIGH = 2; // cache priority high; entry must be cached // static seek pointers - public static int LEN_DESCR = 60; - private static long POS_MAGIC = 0; // 1 byte, byte: file type magic - private static long POS_BUSY = POS_MAGIC + 1; // 1 byte, byte: marker for synchronization - private static long POS_PORT = POS_BUSY + 1; // 2 bytes, short: hint for remote db access - private static long POS_DESCR = POS_PORT + 2; // 60 bytes, string: any description string - private static long POS_COLUMNS = POS_DESCR + LEN_DESCR; // 2 bytes, short: number of columns in one entry - private static long POS_OHBYTEC = POS_COLUMNS + 2; // 2 bytes, number of extra bytes on each Node - private static long POS_OHHANDLEC = POS_OHBYTEC + 2; // 2 bytes, number of Handles on each Node - private static long POS_USEDC = POS_OHHANDLEC + 2; // 4 bytes, int: used counter - private static long POS_FREEC = POS_USEDC + 4; // 4 bytes, int: free counter - private static long POS_FREEH = POS_FREEC + 4; // 4 bytes, int: free pointer (to free chain start) - private static long POS_MD5PW = POS_FREEH + 4; // 16 bytes, string (encrypted password to this file) - private static long POS_ENCRYPTION = POS_MD5PW + 16; // 16 bytes, string (method description) - private static long POS_OFFSET = POS_ENCRYPTION + 16; // 8 bytes, long (seek position of first record) - private static long POS_INTPROPC = POS_OFFSET + 8; // 4 bytes, int: number of INTPROP elements - private static long POS_TXTPROPC = POS_INTPROPC + 4; // 4 bytes, int: number of TXTPROP elements - private static long POS_TXTPROPW = POS_TXTPROPC + 4; // 4 bytes, int: width of TXTPROP elements - private static long POS_COLWIDTHS = POS_TXTPROPW + 4; // array of 4 bytes, int[]: sizes of columns + public static int LEN_DESCR = 60; + protected static long POS_MAGIC = 0; // 1 byte, byte: file type magic + protected static long POS_BUSY = POS_MAGIC + 1; // 1 byte, byte: marker for synchronization + protected static long POS_PORT = POS_BUSY + 1; // 2 bytes, short: hint for remote db access + protected static long POS_DESCR = POS_PORT + 2; // 60 bytes, string: any description string + protected static long POS_COLUMNS = POS_DESCR + LEN_DESCR; // 2 bytes, short: number of columns in one entry + protected static long POS_OHBYTEC = POS_COLUMNS + 2; // 2 bytes, number of extra bytes on each Node + protected static long POS_OHHANDLEC = POS_OHBYTEC + 2; // 2 bytes, number of Handles on each Node + protected static long POS_USEDC = POS_OHHANDLEC + 2; // 4 bytes, int: used counter + protected static long POS_FREEC = POS_USEDC + 4; // 4 bytes, int: free counter + protected static long POS_FREEH = POS_FREEC + 4; // 4 bytes, int: free pointer (to free chain start) + protected static long POS_MD5PW = POS_FREEH + 4; // 16 bytes, string (encrypted password to this file) + protected static long POS_ENCRYPTION = POS_MD5PW + 16; // 16 bytes, string (method description) + protected static long POS_OFFSET = POS_ENCRYPTION + 16; // 8 bytes, long (seek position of first record) + protected static long POS_INTPROPC = POS_OFFSET + 8; // 4 bytes, int: number of INTPROP elements + protected static long POS_TXTPROPC = POS_INTPROPC + 4; // 4 bytes, int: number of TXTPROP elements + protected static long POS_TXTPROPW = POS_TXTPROPC + 4; // 4 bytes, int: width of TXTPROP elements + protected static long POS_COLWIDTHS = POS_TXTPROPW + 4; // array of 4 bytes, int[]: sizes of columns // after this configuration field comes: // POS_HANDLES: INTPROPC * 4 bytes : INTPROPC Integer properties, randomly accessible // POS_TXTPROPS: TXTPROPC * TXTPROPW : an array of TXTPROPC byte arrays of width TXTPROPW that can hold any string @@ -119,29 +121,29 @@ public class kelondroRecords { // values that are only present at run-time protected String filename; // the database's file name protected kelondroIOChunks entryFile; // the database file - private int overhead; // OHBYTEC + 4 * OHHANDLEC = size of additional control bytes - private int headchunksize;// overheadsize + key element column size - private int tailchunksize;// sum(all: COLWIDTHS) minus the size of the key element colum - private int recordsize; // (overhead + sum(all: COLWIDTHS)) = the overall size of a record + protected int overhead; // OHBYTEC + 4 * OHHANDLEC = size of additional control bytes + protected int headchunksize;// overheadsize + key element column size + protected int tailchunksize;// sum(all: COLWIDTHS) minus the size of the key element colum + protected int recordsize; // (overhead + sum(all: COLWIDTHS)) = the overall size of a record // dynamic run-time seek pointers - private long POS_HANDLES = 0; // starts after end of POS_COLWIDHS which is POS_COLWIDTHS + COLWIDTHS.length * 4 - private long POS_TXTPROPS = 0; // starts after end of POS_HANDLES which is POS_HANDLES + HANDLES.length * 4 - private long POS_NODES = 0; // starts after end of POS_TXTPROPS which is POS_TXTPROPS + TXTPROPS.length * TXTPROPW + protected long POS_HANDLES = 0; // starts after end of POS_COLWIDHS which is POS_COLWIDTHS + COLWIDTHS.length * 4 + protected long POS_TXTPROPS = 0; // starts after end of POS_HANDLES which is POS_HANDLES + HANDLES.length * 4 + protected long POS_NODES = 0; // starts after end of POS_TXTPROPS which is POS_TXTPROPS + TXTPROPS.length * TXTPROPW // dynamic variables that are back-ups of stored values in file; read/defined on instantiation - private usageControl USAGE; // counter for used and re-use records and pointer to free-list - private short OHBYTEC; // number of extra bytes in each node - private short OHHANDLEC; // number of handles in each node - private kelondroRow ROW; // array with widths of columns - private Handle HANDLES[]; // array with handles - private byte[] TXTPROPS[]; // array with text properties - private int TXTPROPW; // size of a single TXTPROPS element + protected usageControl USAGE; // counter for used and re-use records and pointer to free-list + protected short OHBYTEC; // number of extra bytes in each node + protected short OHHANDLEC; // number of handles in each node + protected kelondroRow ROW; // array with widths of columns + protected Handle HANDLES[]; // array with handles + protected byte[] TXTPROPS[]; // array with text properties + protected int TXTPROPW; // size of a single TXTPROPS element // caching buffer - private kelondroIntBytesMap cacheHeaders; // the cache; holds overhead values and key element - private int cacheSize; // number of cache records - private int readHit, readMiss, writeUnique, writeDouble, cacheDelete, cacheFlush; + protected kelondroIntBytesMap cacheHeaders; // the cache; holds overhead values and key element + protected int cacheSize; // number of cache records + protected int readHit, readMiss, writeUnique, writeDouble, cacheDelete, cacheFlush; // optional logger protected Logger theLogger = null; @@ -149,10 +151,10 @@ public class kelondroRecords { // Random. This is used to shift flush-times of write-buffers to differrent time private static Random random = new Random(System.currentTimeMillis()); - private class usageControl { - private int USEDC; // counter of used elements - private int FREEC; // counter of free elements in list of free Nodes - private Handle FREEH; // pointer to first element in list of free Nodes, empty = NUL + protected final class usageControl { + protected int USEDC; // counter of used elements + protected int FREEC; // counter of free elements in list of free Nodes + protected Handle FREEH; // pointer to first element in list of free Nodes, empty = NUL public usageControl() throws IOException { read(); @@ -464,7 +466,7 @@ public class kelondroRecords { return this.headchunksize + element_in_cache; } - public int[] cacheNodeStatus() { + public final int[] cacheNodeStatus() { if (cacheHeaders == null) return new int[]{0,0,0,0,0,0,0,0,0,0}; return new int[]{ cacheSize, @@ -480,7 +482,7 @@ public class kelondroRecords { }; } - public String cacheNodeStatusString() { + public final String cacheNodeStatusString() { return "cacheMaxSize=" + cacheSize + ", cacheCurrSize=" + ((cacheHeaders == null) ? 0 : cacheHeaders.size()) + @@ -492,20 +494,20 @@ public class kelondroRecords { ", cacheFlush=" + cacheFlush; } - private static int[] cacheCombinedStatus(int[] a, int[] b) { + private final static int[] cacheCombinedStatus(int[] a, int[] b) { int[] c = new int[a.length]; for (int i = a.length - 1; i >= 0; i--) c[i] = a[i] + b[i]; return c; } - public static int[] cacheCombinedStatus(int[][] a, int l) { + public final static int[] cacheCombinedStatus(int[][] a, int l) { if ((a == null) || (a.length == 0) || (l == 0)) return null; if ((a.length >= 1) && (l == 1)) return a[0]; if ((a.length >= 2) && (l == 2)) return cacheCombinedStatus(a[0], a[1]); return cacheCombinedStatus(cacheCombinedStatus(a, l - 1), a[l - 1]); } - public byte[] bulkRead(int start, int end) throws IOException { + public final byte[] bulkRead(int start, int end) throws IOException { // a bulk read simply reads a piece of memory from the record file // this makes only sense if there are no overhead bytes or pointer // the end value is OUTSIDE the record interval @@ -519,19 +521,19 @@ public class kelondroRecords { return bulk; } - protected Node newNode() throws IOException { + protected final Node newNode() throws IOException { return new Node(); } - protected Node getNode(Handle handle) throws IOException { + protected final Node getNode(Handle handle) throws IOException { return getNode(handle, null, 0); } - protected Node getNode(Handle handle, Node parentNode, int referenceInParent) throws IOException { + protected final Node getNode(Handle handle, Node parentNode, int referenceInParent) throws IOException { return new Node(handle, parentNode, referenceInParent); } - protected void deleteNode(Handle handle) throws IOException { + protected final void deleteNode(Handle handle) throws IOException { if (cacheSize != 0) { synchronized (cacheHeaders) { cacheHeaders.removeb(handle.index); @@ -570,13 +572,13 @@ public class kelondroRecords { //private byte[] ohBytes = null; // the overhead bytes, OHBYTEC values //private Handle[] ohHandle= null; // the overhead handles, OHHANDLEC values //private byte[][] values = null; // an array of byte[] nodes is the value vector - private Handle handle = null; // index of the entry, by default NUL means undefined - private byte[] headChunk = null; // contains ohBytes, ohHandles and the key value - private byte[] tailChunk = null; // contains all values except the key value - private boolean headChanged = true; - private boolean tailChanged = true; + protected Handle handle = null; // index of the entry, by default NUL means undefined + protected byte[] headChunk = null; // contains ohBytes, ohHandles and the key value + protected byte[] tailChunk = null; // contains all values except the key value + protected boolean headChanged = true; + protected boolean tailChanged = true; - private Node() throws IOException { + protected Node() throws IOException { // create a new empty node and reserve empty space in file for it // use this method only if you want to extend the file with new entries // without the need to have content in it. @@ -589,7 +591,7 @@ public class kelondroRecords { for (int i = tailchunksize - 1; i >= 0; i--) this.tailChunk[i] = 0; } - private Node(Handle handle, byte[] bulkchunk, int offset) throws IOException { + protected Node(Handle handle, byte[] bulkchunk, int offset) { // this initializer is used to create nodes from bulk-read byte arrays this.handle = handle; @@ -599,23 +601,8 @@ public class kelondroRecords { System.arraycopy(bulkchunk, offset, this.headChunk, 0, headchunksize); System.arraycopy(bulkchunk, offset + headchunksize, this.tailChunk, 0, tailchunksize); } - /* - private Node(Handle handle) throws IOException { - // this creates an entry with an pre-reserved entry position - // values can be written using the setValues() method - // but we expect that values are already there in the file ready to - // be read which we do not here - if (handle == null) throw new kelondroException(filename, "INTERNAL ERROR: node handle is null."); - if (handle.index >= USAGE.allCount()) throw new kelondroException(filename, "INTERNAL ERROR: node handle index exceeds size."); - - // use given handle - this.handle = new Handle(handle.index); - - // init the content - initContent(); - } - */ - private Node(Handle handle, Node parentNode, int referenceInParent) throws IOException { + + protected Node(Handle handle, Node parentNode, int referenceInParent) throws IOException { // this creates an entry with an pre-reserved entry position. // values can be written using the setValues() method, // but we expect that values are already there in the file. @@ -626,16 +613,13 @@ public class kelondroRecords { // the parentNode can be given if an auto-fix in the following case is wanted if (handle == null) throw new kelondroException(filename, "INTERNAL ERROR: node handle is null."); if (handle.index >= USAGE.allCount()) { - if (parentNode == null) { - throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. No auto-fix node was submitted. This is a serious failure."); - } else { - try { - parentNode.setOHHandle(referenceInParent, null); - parentNode.commit(CP_NONE); - logWarning("INTERNAL ERROR, Node/init in " + filename + ": node handle index " + handle.index + " exceeds size. The bad node has been auto-fixed"); - } catch (IOException ee) { - throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. It was tried to fix the bad node, but failed with an IOException: " + ee.getMessage()); - } + if (parentNode == null) throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. No auto-fix node was submitted. This is a serious failure."); + try { + parentNode.setOHHandle(referenceInParent, null); + parentNode.commit(CP_NONE); + logWarning("INTERNAL ERROR, Node/init in " + filename + ": node handle index " + handle.index + " exceeds size. The bad node has been auto-fixed"); + } catch (IOException ee) { + throw new kelondroException(filename, "INTERNAL ERROR, Node/init: node handle index " + handle.index + " exceeds size. It was tried to fix the bad node, but failed with an IOException: " + ee.getMessage()); } } @@ -740,7 +724,7 @@ public class kelondroRecords { public byte[] setValueRow(byte[] row) throws IOException { // if the index is defined, then write values directly to the file, else only to the object - if (row.length != ROW.objectsize()) throw new IOException("setValueRow with wrong (" + row.length + ") row length instead correct: " + ROW.objectsize()); + if ((row != null) && (row.length != ROW.objectsize())) throw new IOException("setValueRow with wrong (" + row.length + ") row length instead correct: " + ROW.objectsize()); byte[] result = getValueRow(); // previous value (this loads the values if not already happened) // set values @@ -848,23 +832,17 @@ public class kelondroRecords { private boolean cacheSpace() { // check for space in cache - // should be only called within a synchronized(XcacheHeaders) environment + // should be only called within a synchronized(cacheHeaders) environment // returns true if it is allowed to add another entry to the cache // returns false if the cache is considered to be full if (cacheSize == 0) return false; // no caching if (cacheHeaders.size() == 0) return true; // nothing there to flush if ((cacheHeaders.size() < cacheSize) && (availableMemory() >= memBlock)) return true; // no need to flush cache space - // delete one entry. distinguish between different priority cases: - if (cacheHeaders.size() != 0) { - // just delete any of the entries - cacheHeaders.removeOne(); - cacheFlush++; - return true; - } else { - // we cannot delete any entry, therefore there is no space for another entry - return false; - } + // just delete any of the entries + cacheHeaders.removeOne(); + cacheFlush++; + return true; } private void updateNodeCache(int priority) { @@ -894,12 +872,12 @@ public class kelondroRecords { protected void printCache() { if (cacheSize == 0) { System.out.println("### file report: " + size() + " entries"); - for (int i = 0; i < size() + 3; i++) { + for (int i = 0; i < USAGE.allCount(); i++) { // print from file to compare System.out.print("#F " + i + ": "); try { for (int j = 0; j < headchunksize; j++) - System.out.print(entryFile.readByte(j + seekpos(new Handle(i))) + ","); + System.out.print(Integer.toHexString(0xff & entryFile.readByte(j + seekpos(new Handle(i)))) + " "); } catch (IOException e) {} System.out.println(); @@ -955,31 +933,31 @@ public class kelondroRecords { } } - private final long seekpos(Handle handle) { + protected final long seekpos(Handle handle) { assert (handle.index >= 0): "handle index too low: " + handle.index; assert (handle.index < USAGE.allCount()): "handle index too high:" + handle.index; return POS_NODES + ((long) recordsize * handle.index); } // additional properties - public synchronized int handles() { + public final synchronized int handles() { return this.HANDLES.length; } - protected void setHandle(int pos, Handle handle) throws IOException { + protected final void setHandle(int pos, Handle handle) throws IOException { if (pos >= HANDLES.length) throw new IllegalArgumentException("setHandle: handle array exceeded"); if (handle == null) handle = new Handle(NUL); HANDLES[pos] = handle; entryFile.writeInt(POS_HANDLES + 4 * pos, handle.index); } - protected Handle getHandle(int pos) { + protected final Handle getHandle(int pos) { if (pos >= HANDLES.length) throw new IllegalArgumentException("getHandle: handle array exceeded"); return (HANDLES[pos].index == NUL) ? null : HANDLES[pos]; } // custom texts - public void setText(int pos, byte[] text) throws IOException { + public final void setText(int pos, byte[] text) throws IOException { if (pos >= TXTPROPS.length) throw new IllegalArgumentException("setText: text array exceeded"); if (text.length > TXTPROPW) throw new IllegalArgumentException("setText: text lemgth exceeded"); if (text == null) text = new byte[0]; @@ -987,26 +965,26 @@ public class kelondroRecords { entryFile.write(POS_TXTPROPS + TXTPROPW * pos, text); } - public byte[] getText(int pos) { + public final byte[] getText(int pos) { if (pos >= TXTPROPS.length) throw new IllegalArgumentException("getText: text array exceeded"); return TXTPROPS[pos]; } // Returns true if this map contains no key-value mappings. - public boolean isEmpty() { + public final boolean isEmpty() { return (USAGE.USEDC == 0); } // Returns the number of key-value mappings in this map. - public int size() { + public final int size() { return USAGE.USEDC; } - protected int free() { + protected final int free() { return USAGE.FREEC; } - private void dispose(Handle h) throws IOException { + private final void dispose(Handle h) throws IOException { // delete element with handle h // this element is then connected to the deleted-chain and can be // re-used change counter @@ -1021,22 +999,17 @@ public class kelondroRecords { } } - public Iterator contentRows(long maxInitTime) throws kelondroException { - // returns an iterator of kelondroRow.Entry-objects that are not marked as 'deleted' - try { - return new contentRowIterator(maxInitTime); - } catch (IOException e) { - return new HashSet().iterator(); - } + public final Iterator contentRows(long maxInitTime) throws kelondroException { + return new contentRowIterator(maxInitTime); } - public class contentRowIterator implements Iterator { + public final class contentRowIterator implements Iterator { // iterator that iterates all kelondroRow.Entry-objects in the file // all records that are marked as deleted are ommitted private Iterator nodeIterator; - public contentRowIterator(long maxInitTime) throws IOException { + public contentRowIterator(long maxInitTime) { nodeIterator = contentNodes(maxInitTime); } @@ -1058,7 +1031,7 @@ public class kelondroRecords { } - protected Iterator contentNodes(long maxInitTime) throws kelondroException { + protected final Iterator contentNodes(long maxInitTime) throws kelondroException { // returns an iterator of Node-objects that are not marked as 'deleted' try { return new contentNodeIterator(maxInitTime); @@ -1067,40 +1040,46 @@ public class kelondroRecords { } } - protected class contentNodeIterator implements Iterator { + protected final Set deletedHandles(long maxTime) throws kelondroException, IOException { + // initialize set with deleted nodes; the set contains Handle-Objects + // this may last only the given maxInitTime + // if the initTime is exceeded, the method throws an kelondroException + TreeSet markedDeleted = new TreeSet(); + long timeLimit = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; + long seekp; + synchronized (USAGE) { + if (USAGE.FREEC != 0) { + Handle h = USAGE.FREEH; + while (h.index != NUL) { + //System.out.println("handle=0x" + Integer.toHexString(h.index)); + markedDeleted.add(h); + seekp = seekpos(h); + if (seekp > entryFile.length()) throw new kelondroException("deletedHandles: seek position " + seekp + "/" + h.index + " out of file size " + entryFile.length() + "/" + ((entryFile.length() - POS_NODES) / recordsize)); + h = new Handle(entryFile.readInt(seekp)); + if (System.currentTimeMillis() > timeLimit) throw new kelondroException(filename, "time limit of " + maxTime + " exceeded; > " + markedDeleted.size() + " deleted entries"); + } + } + } + return markedDeleted; + } + + protected final class contentNodeIterator implements Iterator { // iterator that iterates all Node-objects in the file // all records that are marked as deleted are ommitted // this is probably also the fastest way to iterate all objects - private HashSet markedDeleted; + private Set markedDeleted; private Handle pos; private byte[] bulk; private int bulksize; private int bulkstart; // the offset of the bulk array to the node position public contentNodeIterator(long maxInitTime) throws IOException, kelondroException { - pos = new Handle(0); - - // initialize set with deleted nodes - // this may last only the given maxInitTime - // if the initTime is exceeded, the method throws an kelondroException - markedDeleted = new HashSet(); - long timeLimit = (maxInitTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxInitTime; - long seekp; - synchronized (USAGE) { - if (USAGE.FREEC != 0) { - Handle h = USAGE.FREEH; - while (h.index != NUL) { - markedDeleted.add(h); - seekp = seekpos(h); - if (seekp > entryFile.length()) throw new kelondroException("contentNodeIterator: seek position " + seekp + "/" + h.index + " out of file size " + entryFile.length() + "/" + ((entryFile.length() - POS_NODES) / recordsize)); - h = new Handle(entryFile.readInt(seekp)); - if (System.currentTimeMillis() > timeLimit) throw new kelondroException(filename, "time limit of " + maxInitTime + " exceeded; > " + markedDeleted.size() + " deleted entries"); - } - } - } + // initialize markedDeleted set of deleted Handles + markedDeleted = deletedHandles(maxInitTime); // seek first position according the delete node set + pos = new Handle(0); while ((markedDeleted.contains(pos)) && (pos.index < USAGE.allCount())) pos.index++; // initialize bulk @@ -1132,18 +1111,7 @@ public class kelondroRecords { throw new kelondroException(filename, e.getMessage()); } } - /* - public Object next() { - try { - Node n = new Node(pos); - pos.index++; - while ((markedDeleted.contains(pos)) && (pos.index < USAGE.allCount())) pos.index++; - return n; - } catch (IOException e) { - throw new kelondroException(filename, e.getMessage()); - } - } - */ + public void remove() { throw new UnsupportedOperationException(); } @@ -1161,7 +1129,7 @@ public class kelondroRecords { } catch (IOException e) { } } - protected static String[] line2args(String line) { + protected final static String[] line2args(String line) { // parse the command line if ((line == null) || (line.length() == 0)) return null; String args[]; @@ -1175,7 +1143,7 @@ public class kelondroRecords { return args; } - protected static boolean equals(byte[] a, byte[] b) { + protected final static boolean equals(byte[] a, byte[] b) { if (a == b) return true; if ((a == null) || (b == null)) return false; if (a.length != b.length) return false; @@ -1231,6 +1199,7 @@ public class kelondroRecords { System.out.println(" USEDC : " + USAGE.USEDC); System.out.println(" FREEC : " + USAGE.FREEC); System.out.println(" FREEH : " + USAGE.FREEH.toString()); + System.out.println(" NUL repres.: 0x" + Integer.toHexString(NUL)); System.out.println(" Data Offset: 0x" + Long.toHexString(POS_NODES)); System.out.println("--"); System.out.println("RECORDS"); @@ -1240,11 +1209,22 @@ public class kelondroRecords { System.out.println(" Overhead : " + this.overhead + " bytes (" + OHBYTEC + " OH bytes, " + OHHANDLEC + " OH Handles)"); System.out.println(" Recordsize : " + this.recordsize + " bytes"); System.out.println("--"); - printCache(); - System.out.println("--"); - + System.out.println("DELETED HANDLES"); + Set dh = deletedHandles(-1); + Iterator dhi = dh.iterator(); + Handle h; + while (dhi.hasNext()) { + h = (Handle) dhi.next(); + System.out.print(h.index + ", "); + } + System.out.println("\n--"); if (!(records)) return; + // print also all records + System.out.println("CACHE"); + printCache(); + System.out.println("--"); + System.out.println("NODES"); for (int i = 0; i < USAGE.allCount(); i++) System.out.println("NODE: " + new Node(new Handle(i), (Node) null, 0).toString()); } @@ -1253,10 +1233,10 @@ public class kelondroRecords { return size() + " RECORDS IN FILE " + filename; } - protected class Handle implements Comparable { - private int index; + protected final class Handle implements Comparable { + protected int index; - private Handle() throws IOException { + protected Handle() throws IOException { // reserves a new record and returns index of record // the return value is not a seek position // the seek position can be retrieved using the seekpos() function diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java index dd30ea701..1ff0a0f80 100644 --- a/source/de/anomic/kelondro/kelondroRow.java +++ b/source/de/anomic/kelondro/kelondroRow.java @@ -35,10 +35,10 @@ import java.util.StringTokenizer; public class kelondroRow { - private kelondroColumn[] row; + protected kelondroColumn[] row; protected int[] colstart; - private int objectsize; - private Map nickref = null; + protected int objectsize; + protected Map nickref = null; public kelondroRow(kelondroColumn[] row) { this.row = row; @@ -79,7 +79,7 @@ public class kelondroRow { } } - private void genNickRef() { + protected void genNickRef() { if (nickref != null) return; nickref = new HashMap(row.length); for (int i = 0; i < row.length; i++) nickref.put(row[i].nickname(), new Object[]{row[i], new Integer(colstart[i])}); @@ -97,8 +97,8 @@ public class kelondroRow { return row[col]; } - public int width(int row) { - return this.row[row].cellwidth(); + public int width(int column) { + return this.row[column].cellwidth(); } public int[] widths() { @@ -216,15 +216,25 @@ public class kelondroRow { return rowinstance[colstart[column]] == 0; } + public void setCol(String nickname, byte[] cell) { + if (nickref == null) genNickRef(); + Object[] ref = (Object[]) nickref.get(nickname); + if (ref == null) return; + kelondroColumn col = (kelondroColumn) ref[0]; + setCol(col.encoder(), ((Integer) ref[1]).intValue(), col.cellwidth(), cell); + } + public void setCol(int column, byte[] cell) { - int valuewidth = row[column].cellwidth(); - int targetoffset = colstart[column]; + setCol(row[column].encoder(), colstart[column], row[column].cellwidth(), cell); + } + + private void setCol(int encoding, int offset, int length, byte[] cell) { if (cell == null) { - while (valuewidth-- > 0) rowinstance[targetoffset + valuewidth] = 0; + while (length-- > 0) rowinstance[offset + length] = 0; } else { - System.arraycopy(cell, 0, rowinstance, targetoffset, Math.min(cell.length, valuewidth)); // error? - if (cell.length < valuewidth) { - while (valuewidth-- > cell.length) rowinstance[targetoffset + valuewidth] = 0; + System.arraycopy(cell, 0, rowinstance, offset, Math.min(cell.length, length)); + if (cell.length < length) { + while (length-- > cell.length) rowinstance[offset + length] = 0; } } } @@ -244,32 +254,75 @@ public class kelondroRow { } } + public void setCol(String nick, String cell, String encoding) { + if (encoding == null) + setCol(nick, cell.getBytes()); + else + try { + setCol(nick, cell.getBytes(encoding)); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + } + + public void setCol(String nickname, long cell) { + if (nickref == null) genNickRef(); + Object[] ref = (Object[]) nickref.get(nickname); + if (ref == null) return; + kelondroColumn col = (kelondroColumn) ref[0]; + setCol(col.encoder(), ((Integer) ref[1]).intValue(), col.cellwidth(), cell); + } + public void setCol(int column, long cell) { // uses the column definition to choose the right encoding - switch (row[column].encoder()) { + setCol(row[column].encoder(), colstart[column], row[column].cellwidth(), cell); + } + + private void setCol(int encoder, int offset, int length, long cell) { + switch (encoder) { case kelondroColumn.encoder_none: throw new kelondroException("ROW", "setColLong has celltype none, no encoder given"); case kelondroColumn.encoder_b64e: - kelondroBase64Order.enhancedCoder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth()); + kelondroBase64Order.enhancedCoder.encodeLong(cell, rowinstance, offset, length); break; case kelondroColumn.encoder_b256: - kelondroNaturalOrder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth()); + kelondroNaturalOrder.encodeLong(cell, rowinstance, offset, length); break; case kelondroColumn.encoder_bytes: throw new kelondroException("ROW", "setColLong of celltype bytes not applicable"); } } + public byte[] getCol(String nickname, byte[] dflt) { + if (nickref == null) genNickRef(); + Object[] ref = (Object[]) nickref.get(nickname); + if (ref == null) return dflt; + kelondroColumn col = (kelondroColumn) ref[0]; + byte[] cell = new byte[col.cellwidth()]; + System.arraycopy(rowinstance, ((Integer) ref[1]).intValue(), cell, 0, cell.length); + return cell; + } + + public String getColString(String nickname, String dflt, String encoding) { + if (nickref == null) genNickRef(); + Object[] ref = (Object[]) nickref.get(nickname); + if (ref == null) return dflt; + kelondroColumn col = (kelondroColumn) ref[0]; + return getColString(col.encoder(), ((Integer) ref[1]).intValue(), col.cellwidth(), encoding); + } + public String getColString(int column, String encoding) { - int length = row[column].cellwidth(); - int offset = colstart[column]; + return getColString(row[column].encoder(), colstart[column], row[column].cellwidth(), encoding); + } + + private String getColString(int encoder, int offset, int length, String encoding) { if (rowinstance[offset] == 0) return null; if (length > rowinstance.length - offset) length = rowinstance.length - offset; while ((length > 0) && (rowinstance[offset + length - 1] == 0)) length--; if (length == 0) return null; try { if ((encoding == null) || (encoding.length() == 0)) - return new String (rowinstance, offset, length); + return new String(rowinstance, offset, length); else return new String(rowinstance, offset, length, encoding); } catch (UnsupportedEncodingException e) { @@ -277,15 +330,28 @@ public class kelondroRow { } } + public long getColLong(String nickname, long dflt) { + if (nickref == null) genNickRef(); + Object[] ref = (Object[]) nickref.get(nickname); + if (ref == null) return dflt; + kelondroColumn col = (kelondroColumn) ref[0]; + int colstart = ((Integer) ref[1]).intValue(); + return getColLong(col.encoder(), colstart, col.cellwidth()); + } + public long getColLong(int column) { // uses the column definition to choose the right encoding - switch (row[column].encoder()) { + return getColLong(row[column].encoder(), colstart[column], row[column].cellwidth()); + } + + public long getColLong(int encoder, int offset, int length) { + switch (encoder) { case kelondroColumn.encoder_none: throw new kelondroException("ROW", "getColLong has celltype none, no encoder given"); case kelondroColumn.encoder_b64e: - return kelondroBase64Order.enhancedCoder.decodeLong(rowinstance, colstart[column], row[column].cellwidth()); + return kelondroBase64Order.enhancedCoder.decodeLong(rowinstance, offset, length); case kelondroColumn.encoder_b256: - return kelondroNaturalOrder.decodeLong(rowinstance, colstart[column], row[column].cellwidth()); + return kelondroNaturalOrder.decodeLong(rowinstance, offset, length); case kelondroColumn.encoder_bytes: throw new kelondroException("ROW", "getColLong of celltype bytes not applicable"); } diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 48257e7fd..c2ae18b28 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -280,7 +280,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd return -1; } - private int binaryPosition(byte[] key, int astart, int alength) { + public int binaryPosition(byte[] key, int astart, int alength) { // returns the exact position of the key if the key exists, // or a position of an entry that is greater than the key if the // key does not exist diff --git a/source/de/anomic/kelondro/kelondroStack.java b/source/de/anomic/kelondro/kelondroStack.java index c381821ee..d8ec9800b 100644 --- a/source/de/anomic/kelondro/kelondroStack.java +++ b/source/de/anomic/kelondro/kelondroStack.java @@ -62,10 +62,10 @@ public final class kelondroStack extends kelondroRecords { private static short thisFHandles = 2; // two file handles for root handle and handle to last lement // define pointers for OH array access - private static int left = 0; // pointer for OHHandle-array: handle()-Value of left child Node - private static int right = 1; // pointer for OHHandle-array: handle()-Value of right child Node - private static int root = 0; // pointer for FHandles-array: pointer to root node - private static int toor = 1; // pointer for FHandles-array: pointer to root node + protected static final int left = 0; // pointer for OHHandle-array: handle()-Value of left child Node + protected static final int right = 1; // pointer for OHHandle-array: handle()-Value of right child Node + protected static final int root = 0; // pointer for FHandles-array: pointer to root node + protected static final int toor = 1; // pointer for FHandles-array: pointer to root node public kelondroStack(File file, kelondroRow rowdef, boolean exitOnFail) { // this creates a new stack @@ -98,7 +98,7 @@ public final class kelondroStack extends kelondroRecords { kelondroRow row = stack.row(); // close and delete the file - try {stack.close();} catch (Exception e) {}; + try {stack.close();} catch (Exception e) {} if (f.exists()) f.delete(); // re-open a database with same settings as before diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 9b175dd1b..3cc051b40 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -63,33 +63,33 @@ import java.util.Map; public class kelondroTree extends kelondroRecords implements kelondroIndex { // logging (This probably needs someone to initialize the java.util.logging.* facilities); - public static Logger log = Logger.getLogger("KELONDRO"); + public static final Logger log = Logger.getLogger("KELONDRO"); // define the Over-Head-Array - private static short thisOHBytes = 2; // our record definition of two bytes - private static short thisOHHandles = 3; // and three handles overhead - private static short thisFHandles = 1; // file handles: one for a root pointer + protected static final short thisOHBytes = 2; // our record definition of two bytes + protected static final short thisOHHandles = 3; // and three handles overhead + protected static final short thisFHandles = 1; // file handles: one for a root pointer // define pointers for OH array access - private static int magic = 0; // pointer for OHByte-array: marker for Node purpose; defaults to 1 - private static int balance = 1; // pointer for OHByte-array: balance value of tree node; balanced = 0 + protected static final int magic = 0; // pointer for OHByte-array: marker for Node purpose; defaults to 1 + protected static final int balance = 1; // pointer for OHByte-array: balance value of tree node; balanced = 0 - private static int parent = 0; // pointer for OHHandle-array: handle()-Value of parent Node - private static int leftchild = 1; // pointer for OHHandle-array: handle()-Value of left child Node - private static int rightchild = 2; // pointer for OHHandle-array: handle()-Value of right child Node + protected static final int parent = 0; // pointer for OHHandle-array: handle()-Value of parent Node + protected static final int leftchild = 1; // pointer for OHHandle-array: handle()-Value of left child Node + protected static final int rightchild = 2; // pointer for OHHandle-array: handle()-Value of right child Node - private static int root = 0; // pointer for FHandles-array: pointer to root node + protected static final int root = 0; // pointer for FHandles-array: pointer to root node // calibration of cache - public static int defaultObjectCachePercent = 30; + public static final int defaultObjectCachePercent = 30; // class variables - private Search writeSearchObj = new Search(); - protected kelondroOrder objectOrder = new kelondroNaturalOrder(true); - private final kelondroOrder loopDetectionOrder = new kelondroNaturalOrder(true); - private int readAheadChunkSize = 100; - private long lastIteratorCount = readAheadChunkSize; - private kelondroObjectCache objectCache; + private final Search writeSearchObj = new Search(); + protected kelondroOrder objectOrder = new kelondroNaturalOrder(true); + protected kelondroOrder loopDetectionOrder = new kelondroNaturalOrder(true); + protected int readAheadChunkSize = 100; + protected long lastIteratorCount = readAheadChunkSize; + private kelondroObjectCache objectCache; public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, boolean exitOnFail) { // this creates a new tree file @@ -171,13 +171,13 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { public String[] cacheObjectStatus() { if (this.objectCache == null) return null; - else return this.objectCache.status(); + return this.objectCache.status(); } private void writeOrderType() { try { super.setDescription(objectOrder.signature().getBytes()); - } catch (IOException e) {}; + } catch (IOException e) {} } private void readOrderType() { @@ -185,7 +185,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { byte[] d = super.getDescription(); String s = new String(d).substring(0, 2); this.objectOrder = orderBySignature(s); - } catch (IOException e) {}; + } catch (IOException e) {} } public static kelondroOrder orderBySignature(String signature) { @@ -292,8 +292,8 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { if (k == null) { found = false; return; - } else { - if (visitedNodeKeys.contains(k)) { + } + if (visitedNodeKeys.contains(k)) { // we have loops in the database. // to fix this, all affected nodes must be patched thenode.setOHByte(magic, (byte) 1); @@ -305,7 +305,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { logWarning("kelondroTree.Search.process: database contains loops; the loop-nodes have been auto-fixed"); found = false; return; - } + } // System.out.println("Comparing key = '" + new String(key) + "' with '" + otherkey + "':"); // debug c = objectOrder.compare(key, k); // System.out.println(c); // debug @@ -321,7 +321,6 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { thisHandle = thenode.getOHHandle(rightchild); } visitedNodeKeys.add(k); - } } } // System.out.println("DEBUG: search for " + new String(key) + " ended with status=" + ((found) ? "found" : "not-found") + ", node=" + ((thenode == null) ? "NULL" : thenode.toString()) + ", parent=" + ((parentnode == null) ? "NULL" : parentnode.toString())); @@ -336,26 +335,27 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { public Node getMatcher() { if (found) return thenode; - else throw new IllegalArgumentException("wrong access of matcher"); + throw new IllegalArgumentException("wrong access of matcher"); } public Node getParent() { - if (found) return parentnode; else return thenode; + if (found) return parentnode; + return thenode; } public boolean isRoot() { if (found) throw new IllegalArgumentException("wrong access of isRoot"); - else return (child == 0); + return (child == 0); } public boolean isLeft() { if (found) throw new IllegalArgumentException("wrong access of leftchild"); - else return (child == -1); + return (child == -1); } public boolean isRight() { if (found) throw new IllegalArgumentException("wrong access of leftchild"); - else return (child == 1); + return (child == 1); } } @@ -493,19 +493,14 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { break; } break; - } else { - // crawl up the tree - if (parentNode.getOHHandle(parent) == null) { - // root reached: stop - break; - } else { - theNode = parentNode; - parentNode = getNode(parentNode.getOHHandle(parent), null, 0); - } } + // crawl up the tree + if (parentNode.getOHHandle(parent) == null) break; // root reached: stop + theNode = parentNode; + parentNode = getNode(parentNode.getOHHandle(parent), null, 0); } - result = null;; // that means: no previous stored value present + result = null; // that means: no previous stored value present } } //writeLock.release(); @@ -557,11 +552,13 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { } private static byte max0(byte b) { - if (b > 0) return b; else return 0; + if (b > 0) return b; + return 0; } private static byte min0(byte b) { - if (b < 0) return b; else return 0; + if (b < 0) return b; + return 0; } private void LL_RightRotation(Node parentNode, Node childNode) throws IOException { @@ -754,15 +751,15 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { deleteNode(node.handle()); } - private Node firstNode() throws IOException { - Handle h = getHandle(root); - if (h == null) return null; - return firstNode(getNode(h, null, 0)); + protected Node firstNode() throws IOException { + Handle h = getHandle(root); + if (h == null) return null; + return firstNode(getNode(h, null, 0)); } - private Node firstNode(Node node) throws IOException { + protected Node firstNode(Node node) throws IOException { if (node == null) throw new IllegalArgumentException("firstNode: node=null"); - Handle h = node.getOHHandle(leftchild); + Handle h = node.getOHHandle(leftchild); HashSet visitedNodeKeys = new HashSet(); // to detect loops String nodeKey; while (h != null) { @@ -776,22 +773,22 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { return node; } h = node.getOHHandle(leftchild); - } - return node; + } + return node; } - private Node lastNode() throws IOException { - Handle h = getHandle(root); - if (h == null) return null; - return lastNode(getNode(h, null, 0)); + protected Node lastNode() throws IOException { + Handle h = getHandle(root); + if (h == null) return null; + return lastNode(getNode(h, null, 0)); } - private Node lastNode(Node node) throws IOException { - if (node == null) throw new IllegalArgumentException("lastNode: node=null"); + protected Node lastNode(Node node) throws IOException { + if (node == null) throw new IllegalArgumentException("lastNode: node=null"); Handle h = node.getOHHandle(rightchild); HashSet visitedNodeKeys = new HashSet(); // to detect loops String nodeKey; - while (h != null) { + while (h != null) { try { node = getNode(h, node, rightchild); nodeKey = new String(node.getKey()); @@ -802,8 +799,8 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { return node; } h = node.getOHHandle(rightchild); - } - return node; + } + return node; } private class nodeIterator implements Iterator { @@ -1140,14 +1137,16 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { private int height(Node node) throws IOException { if (node == null) return 0; Handle h = node.getOHHandle(leftchild); - int hl = (h == null) ? 0 : height(getNode(h, node, leftchild)); + int hl = (h == null) ? 0 : height(getNode(h, node, leftchild)); h = node.getOHHandle(rightchild); int hr = (h == null) ? 0 : height(getNode(h, node, rightchild)); - if (hl > hr) return hl + 1; else return hr + 1; + if (hl > hr) return hl + 1; + return hr + 1; } public String np(Object n) { - if (n == null) return "NULL"; else return n.toString(); + if (n == null) return "NULL"; + return n.toString(); } public void print() throws IOException { diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java index 6534e15b1..aa82532b5 100644 --- a/source/de/anomic/plasma/plasmaCondenser.java +++ b/source/de/anomic/plasma/plasmaCondenser.java @@ -470,11 +470,11 @@ public final class plasmaCondenser { writer.close(); } - private static boolean punctuation(char c) { + protected final static boolean punctuation(char c) { return ("!?.".indexOf(c) >= 0); } - public static boolean invisible(char c) { + public final static boolean invisible(char c) { if ((c < ' ') || (c > 'z')) return true; return ("$%&/()=\"$%&/()=`^+*~#'-_:;,|<>[]\\".indexOf(c) >= 0); } diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index 06c48707d..a1239185a 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -458,7 +458,7 @@ public class plasmaCrawlNURL extends indexURL { private int forkfactor; // sum of anchors of all ancestors private bitfield flags; private int handle; - private boolean stored;; + private boolean stored; public Entry(String initiator, URL url, diff --git a/source/de/anomic/plasma/plasmaRankingCRProcess.java b/source/de/anomic/plasma/plasmaRankingCRProcess.java index d0c409d6c..df6b37c0a 100644 --- a/source/de/anomic/plasma/plasmaRankingCRProcess.java +++ b/source/de/anomic/plasma/plasmaRankingCRProcess.java @@ -52,6 +52,11 @@ import java.util.Map; import de.anomic.kelondro.kelondroAttrSeq; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroCollectionIndex; +import de.anomic.kelondro.kelondroFlexTable; +import de.anomic.kelondro.kelondroIndex; +import de.anomic.kelondro.kelondroRow; +import de.anomic.kelondro.kelondroRowSet; import de.anomic.server.serverFileUtils; import de.anomic.server.serverDate; import de.anomic.tools.bitfield; @@ -63,7 +68,20 @@ public class plasmaRankingCRProcess { header.append("# Created=" + System.currentTimeMillis()); header.append((char) 13); header.append((char) 10); header.append("# Structure=,'=',,,,,,,,,,,'|',*"); header.append((char) 13); header.append((char) 10); header.append("# ---"); header.append((char) 13); header.append((char) 10); - */ + */ + + public static final kelondroRow CRG_accrow = new kelondroRow( + "byte[] Referee-12," + + "Cardinal UDate-3 {b64e}, Cardinal VDate-3 {b64e}, " + + "Cardinal LCount-2 {b64e}, Cardinal GCount-2 {b64e}, Cardinal ICount-2 {b64e}, Cardinal DCount-2 {b64e}, Cardinal TLength-3 {b64e}, " + + "Cardinal WACount-3 {b64e}, Cardinal WUCount-3 {b64e}, Cardinal Flags-1 {b64e}, " + + "Cardinal FUDate-3 {b64e}, Cardinal FDDate-3 {b64e}, Cardinal LUDate-3 {b64e}, " + + "Cardinal UCount-2 {b64e}, Cardinal PCount-2 {b64e}, Cardinal ACount-2 {b64e}, Cardinal VCount-2 {b64e}, Cardinal Vita-2 {b64e}"); + public static final kelondroRow CRG_colrow = new kelondroRow("byte[] Anchor-12"); + public static final String CRG_accname = "CRG-a-attr"; + public static final String CRG_seqname = "CRG-a-coli"; + public static final kelondroRow RCI_coli = new kelondroRow("byte[] RefereeDom-6"); + public static final String RCI_colname = "RCI-a-coli"; private static boolean accumulate_upd(File f, kelondroAttrSeq acc) { // open file @@ -96,7 +114,7 @@ public class plasmaRankingCRProcess { Vita = (int) acc_entry.getAttr("Vita", 0); // update counters and dates - acc_entry.setSeq(new_entry.getSeq()); // need to be checked + acc_entry.setSeq(new_entry.getSeqSet()); // need to be checked UCount++; // increase update counter PCount += (new_flags.get(1)) ? 1 : 0; @@ -111,7 +129,7 @@ public class plasmaRankingCRProcess { acc_entry.setAttr("Flags", (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(acc_flags.getBytes()))); } else { // initialize counters and dates - acc_entry = acc.newEntry(key, new_entry.getAttrs(), new_entry.getSeq()); + acc_entry = acc.newEntry(key, new_entry.getAttrs(), new_entry.getSeqSet()); FUDate = plasmaWordIndex.microDateHoursInt(System.currentTimeMillis()); // first update date FDDate = plasmaWordIndex.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack LUDate = (int) new_entry.getAttr("VDate", 0); @@ -138,7 +156,86 @@ public class plasmaRankingCRProcess { return true; } - public static void accumulate(File from_dir, File tmp_dir, File err_dir, File bkp_dir, File to_file, int max_files) throws IOException { + private static boolean accumulate_upd(File f, kelondroIndex acc, kelondroCollectionIndex seq) throws IOException { + // open file + kelondroAttrSeq source_cr = null; + try { + source_cr = new kelondroAttrSeq(f, false); + } catch (IOException e) { + return false; + } + + // put elements in accumulator file + Iterator el = source_cr.keys(); + String key; + kelondroAttrSeq.Entry new_entry; + kelondroRow.Entry acc_entry; + int FUDate, FDDate, LUDate, UCount, PCount, ACount, VCount, Vita; + bitfield acc_flags, new_flags; + while (el.hasNext()) { + key = (String) el.next(); + new_entry = source_cr.getEntry(key); + new_flags = new bitfield(kelondroBase64Order.enhancedCoder.encodeLong((long) new_entry.getAttr("Flags", 0), 1).getBytes()); + // enrich information with additional values + if ((acc_entry = acc.get(key.getBytes())) != null) { + FUDate = (int) acc_entry.getColLong("FUDate", 0); + FDDate = (int) acc_entry.getColLong("FDDate", 0); + LUDate = (int) acc_entry.getColLong("LUDate", 0); + UCount = (int) acc_entry.getColLong("UCount", 0); + PCount = (int) acc_entry.getColLong("PCount", 0); + ACount = (int) acc_entry.getColLong("ACount", 0); + VCount = (int) acc_entry.getColLong("VCount", 0); + Vita = (int) acc_entry.getColLong("Vita", 0); + + // update counters and dates + seq.put(key.getBytes(), new_entry.getSeqCollection()); // FIXME: old and new collection must be joined + + UCount++; // increase update counter + PCount += (new_flags.get(1)) ? 1 : 0; + ACount += (new_flags.get(2)) ? 1 : 0; + VCount += (new_flags.get(3)) ? 1 : 0; + + // 'OR' the flags + acc_flags = new bitfield(kelondroBase64Order.enhancedCoder.encodeLong(acc_entry.getColLong("Flags", 0), 1).getBytes()); + for (int i = 0; i < 6; i++) { + if (new_flags.get(i)) acc_flags.set(i, true); + } + acc_entry.setCol("Flags", (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(acc_flags.getBytes()))); + } else { + // initialize counters and dates + acc_entry = acc.row().newEntry(); + acc_entry.setCol("Referee", key, null); + for (int i = 1; i < acc.row().columns(); i++) { + acc_entry.setCol(i, new_entry.getAttr(acc.row().column(i).nickname(), 0)); + } + seq.put(key.getBytes(), new_entry.getSeqCollection()); + FUDate = plasmaWordIndex.microDateHoursInt(System.currentTimeMillis()); // first update date + FDDate = plasmaWordIndex.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack + LUDate = (int) new_entry.getAttr("VDate", 0); + UCount = 0; + PCount = (new_flags.get(1)) ? 1 : 0; + ACount = (new_flags.get(2)) ? 1 : 0; + VCount = (new_flags.get(3)) ? 1 : 0; + Vita = 0; + } + // make plausibility check? + + // insert into accumulator + acc_entry.setCol("FUDate", (long) FUDate); + acc_entry.setCol("FDDate", (long) FDDate); + acc_entry.setCol("LUDate", (long) LUDate); + acc_entry.setCol("UCount", (long) UCount); + acc_entry.setCol("PCount", (long) PCount); + acc_entry.setCol("ACount", (long) ACount); + acc_entry.setCol("VCount", (long) VCount); + acc_entry.setCol("Vita", (long) Vita); + acc.put(acc_entry); + } + + return true; + } + + public static void accumulate(File from_dir, File tmp_dir, File err_dir, File bkp_dir, File to_file, int max_files, boolean newdb) throws IOException { if (!(from_dir.isDirectory())) { System.out.println("source path " + from_dir + " is not a directory."); return; @@ -158,16 +255,23 @@ public class plasmaRankingCRProcess { // open target file kelondroAttrSeq acc = null; - if (!(to_file.exists())) { - acc = new kelondroAttrSeq("Global Ranking Accumulator File", + kelondroIndex newacc = null; + kelondroCollectionIndex newseq = null; + if (newdb) { + File path = to_file.getParentFile(); // path to storage place + newacc = new kelondroFlexTable(path, CRG_accname, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, CRG_accrow, true); + newseq = new kelondroCollectionIndex(path, CRG_seqname, 12, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, 2, CRG_colrow); + } else { + if (!(to_file.exists())) { + acc = new kelondroAttrSeq("Global Ranking Accumulator File", ",'='," + ",,,,,,,,,," + ",,,,,,,," + "'|',*", false); - acc.toFile(to_file); - } - acc = new kelondroAttrSeq(to_file, false); - + acc.toFile(to_file); + } + acc = new kelondroAttrSeq(to_file, false); + } // collect source files File source_file = null; String[] files = from_dir.list(); @@ -175,30 +279,47 @@ public class plasmaRankingCRProcess { for (int i = 0; i < max_files; i++) { // open file source_file = new File(from_dir, files[i]); - if (accumulate_upd(source_file, acc)) { - // move cr file to temporary folder - source_file.renameTo(new File(tmp_dir, files[i])); + if (newdb) { + if (accumulate_upd(source_file, newacc, newseq)) { + // move cr file to temporary folder + source_file.renameTo(new File(tmp_dir, files[i])); + } else { + // error case: the cr-file is not valid; move to error path + source_file.renameTo(new File(err_dir, files[i])); + } } else { - // error case: the cr-file is not valid; move to error path - source_file.renameTo(new File(err_dir, files[i])); + if (accumulate_upd(source_file, acc)) { + // move cr file to temporary folder + source_file.renameTo(new File(tmp_dir, files[i])); + } else { + // error case: the cr-file is not valid; move to error path + source_file.renameTo(new File(err_dir, files[i])); + } } } - // save accumulator to temporary file - File tmp_file; - if (to_file.toString().endsWith(".gz")) { - tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".tmp.gz"); - } else { - tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".tmp"); - } try { - acc.toFile(tmp_file); - // since this was successful, we remove the old file and move the new file to it - to_file.delete(); - tmp_file.renameTo(to_file); + if (newdb) { + newacc.close(); + newseq.close(); + } else { + // save accumulator to temporary file + File tmp_file; + if (to_file.toString().endsWith(".gz")) { + tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".tmp.gz"); + } else { + tmp_file = new File(to_file.toString() + "." + (System.currentTimeMillis() % 1000) + ".tmp"); + } + // store the file + acc.toFile(tmp_file); + // since this was successful, we remove the old file and move the new file to it + to_file.delete(); + tmp_file.renameTo(to_file); + } serverFileUtils.moveAll(tmp_dir, bkp_dir); } catch (IOException e) { // move previously processed files back + e.printStackTrace(); serverFileUtils.moveAll(tmp_dir, from_dir); } @@ -232,7 +353,7 @@ public class plasmaRankingCRProcess { cr_UDate = cr_entry.getAttr("UDate", 0); // loop over all anchors - Iterator j = cr_entry.getSeq().entrySet().iterator(); + Iterator j = cr_entry.getSeqSet().iterator(); Map.Entry entry; while (j.hasNext()) { // get domain of anchors @@ -243,7 +364,7 @@ public class plasmaRankingCRProcess { // update domain-specific entry rci_entry = rci.getEntry(anchorDom); if (rci_entry == null) rci_entry = rci.newEntry(anchorDom, false); - rci_entry.addSeq(referee, null); + rci_entry.addSeq(referee); // update Update-Date rci_UDate = rci_entry.getAttr("UDate", 0); @@ -268,11 +389,61 @@ public class plasmaRankingCRProcess { return count; } + public static int genrcix(File cr_path_in, File rci_path_out) throws IOException { + //kelondroFlexTable acc = new kelondroFlexTable(cr_path_in, CRG_accname, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, CRG_accrow, true); + kelondroCollectionIndex seq = new kelondroCollectionIndex(cr_path_in, CRG_seqname, 12, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, 2, CRG_colrow); + kelondroCollectionIndex rci = new kelondroCollectionIndex(rci_path_out, RCI_colname, 6, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, 2, RCI_coli); + + // loop over all referees + int count = 0; + int size = seq.size(); + long start = System.currentTimeMillis(); + long l; + final Iterator i = seq.keycollections(null, false); + Object[] keycollection; + String referee, refereeDom, anchor, anchorDom; + kelondroRowSet cr_entry, rci_entry; + while (i.hasNext()) { + keycollection = (Object[]) i.next(); + referee = new String((byte[]) keycollection[0]); + if (referee.length() == 6) refereeDom = referee; else refereeDom = referee.substring(6); + cr_entry = (kelondroRowSet) keycollection[1]; + + // loop over all anchors + Iterator j = cr_entry.rows(); + kelondroRow.Entry entry; + while (j.hasNext()) { + // get domain of anchors + entry = (kelondroRow.Entry) j.next(); + anchor = (String) entry.getColString(0, null); + if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6); + + // update domain-specific entry + rci_entry = rci.get(anchorDom.getBytes(), false); + if (rci_entry == null) rci_entry = new kelondroRowSet(RCI_coli); + rci_entry.add(refereeDom.getBytes()); + + // insert entry + rci.put(anchorDom.getBytes(), rci_entry); + } + count++; + if ((count % 1000) == 0) { + l = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); + System.out.println("processed " + count + " citations, " + (count / l) + " per second, rci.size = " + rci.size() + ", " + ((size - count) / (count / l) / 60) + " minutes remaining; mem = " + Runtime.getRuntime().freeMemory()); + } + } + + // finished. write to file + seq.close(); + rci.close(); + return count; + } + public static void main(String[] args) { // java -classpath source de.anomic.plasma.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr try { if ((args.length == 5) && (args[0].equals("-accumulate"))) { - accumulate(new File(args[1]), new File(args[2]), new File(args[3]), new File(args[4]), new File(args[5]), Integer.parseInt(args[6])); + accumulate(new File(args[1]), new File(args[2]), new File(args[3]), new File(args[4]), new File(args[5]), Integer.parseInt(args[6]), true); } if ((args.length == 2) && (args[0].equals("-accumulate"))) { File root_path = new File(args[1]); @@ -291,7 +462,7 @@ public class plasmaRankingCRProcess { serverFileUtils.moveAll(from_dir, ready_dir); long start = System.currentTimeMillis(); int files = ready_dir.list().length; - accumulate(ready_dir, tmp_dir, err_dir, acc_dir, to_file, 1000); + accumulate(ready_dir, tmp_dir, err_dir, acc_dir, to_file, 1000, true); long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); System.out.println("Finished accumulate for " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)"); } @@ -332,6 +503,17 @@ public class plasmaRankingCRProcess { long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); System.out.println("Finished recycling of " + files + " files in " + seconds + " seconds (" + (files / seconds) + " files/second)"); } + if ((args.length == 2) && (args[0].equals("-genrci"))) { + File root_path = new File(args[1]); + File cr_filedir = new File(root_path, "DATA/RANKING/GLOBAL/020_con0"); + File rci_filedir = new File(root_path, "DATA/RANKING/GLOBAL/030_rci0"); + rci_filedir.mkdirs(); + long start = System.currentTimeMillis(); + int count = genrcix(cr_filedir, rci_filedir); + long seconds = java.lang.Math.max(1, (System.currentTimeMillis() - start) / 1000); + System.out.println("Completed RCI generation: " + count + " citation references in " + seconds + " seconds (" + (count / seconds) + " CR-records/second)"); + } + /* if ((args.length == 2) && (args[0].equals("-genrci"))) { File root_path = new File(args[1]); File cr_filedir = new File(root_path, "DATA/RANKING/GLOBAL/020_con0"); @@ -345,6 +527,7 @@ public class plasmaRankingCRProcess { System.out.println("Completed RCI generation for input file " + cr_filenames[i] + ": " + count + " citation references in " + seconds + " seconds (" + (count / seconds) + " CR-records/second)"); } } + */ } catch (IOException e) { e.printStackTrace(); } diff --git a/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java b/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java index a3df1323d..ebf6107f4 100644 --- a/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java +++ b/source/de/anomic/plasma/plasmaRankingRCIEvaluation.java @@ -73,7 +73,7 @@ public class plasmaRankingRCIEvaluation { while (i.hasNext()) { key = (String) i.next(); entry = rci.getEntry(key); - c = entry.getSeq().size(); + c = entry.getSeqSet().size(); if (c > maxcount) maxcount = c; count_key = new Integer(c); count_count = (Integer) counts.get(count_key); @@ -162,7 +162,7 @@ public class plasmaRankingRCIEvaluation { while (i.hasNext()) { key = (String) i.next(); entry = rci.getEntry(key); - ranked[orderIntoYBI(partition, entry.getSeq().size())].add(key); + ranked[orderIntoYBI(partition, entry.getSeqSet().size())].add(key); } return ranked; } diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java index 6634f43d5..4434294d0 100644 --- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java +++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java @@ -216,7 +216,7 @@ public class plasmaSwitchboardQueue { this.referrerURL = null; } - public Entry(kelondroRow.Entry row) throws IOException { + public Entry(kelondroRow.Entry row) { long ims = row.getColLong(2); byte flags = row.getColByte(3); try { diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 43e351054..ac913f7b6 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -449,7 +449,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { - int removed = 0;; + int removed = 0; synchronized (ramCache) { removed += ramCache.removeEntries(wordHash, urlHashes, deleteComplete); if (removed == urlHashes.size()) return removed; diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index dc2eadc00..7132b1fd6 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -150,7 +150,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index pathc++; } } - while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {}; + while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {} } else { hierarchy.add(list); buffer = next0(); @@ -216,7 +216,7 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index public Object next() { String r = buffer; - while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {}; + while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {} return r; } diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index 670bf91be..ec80576b3 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -51,10 +51,10 @@ import de.anomic.http.httpc; public class loaderThreads { // global values for loader threads - private int timeout; - private String user; - private String password; - private httpRemoteProxyConfig remoteProxyConfig; + protected int timeout; + protected String user; + protected String password; + protected httpRemoteProxyConfig remoteProxyConfig; // management objects for collection of threads Hashtable threads; diff --git a/source/de/anomic/yacy/yacyDHTAction.java b/source/de/anomic/yacy/yacyDHTAction.java index 460eb992c..d1428ba77 100644 --- a/source/de/anomic/yacy/yacyDHTAction.java +++ b/source/de/anomic/yacy/yacyDHTAction.java @@ -51,8 +51,8 @@ import de.anomic.server.logging.serverLog; public class yacyDHTAction implements yacyPeerAction { - private yacySeedDB seedDB; - private kelondroMScoreCluster seedCrawlReady; + protected yacySeedDB seedDB; + protected kelondroMScoreCluster seedCrawlReady; public yacyDHTAction(yacySeedDB seedDB) { this.seedDB = seedDB; diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index a380151d8..abca91d4e 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -61,7 +61,7 @@ public class yacyNewsDB { private File path; private int bufferkb; private long preloadTime; - private kelondroTree news; + protected kelondroTree news; public yacyNewsDB(File path, int bufferkb, long preloadTime) { this.path = path; @@ -161,7 +161,7 @@ public class yacyNewsDB { } } - private static yacyNewsRecord b2r(kelondroRow.Entry b) { + protected final static yacyNewsRecord b2r(kelondroRow.Entry b) { if (b == null) return null; return new yacyNewsRecord( b.getColString(0, null), @@ -172,7 +172,7 @@ public class yacyNewsDB { ); } - private kelondroRow.Entry r2b(yacyNewsRecord r) { + protected final kelondroRow.Entry r2b(yacyNewsRecord r) { if (r == null) return null; String attributes = r.attributes().toString(); if (attributes.length() > yacyNewsRecord.attributesMaxLength) throw new IllegalArgumentException("attribute length=" + attributes.length() + " exceeds maximum size=" + yacyNewsRecord.attributesMaxLength); diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index c8b8de54a..e8c0893c1 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -89,9 +89,9 @@ public final class yacySeedDB { public static final String[] accFields = new String[] {yacySeed.LCOUNT, yacySeed.ICOUNT, yacySeed.ISPEED}; // class objects - private File seedActiveDBFile, seedPassiveDBFile, seedPotentialDBFile; + protected File seedActiveDBFile, seedPassiveDBFile, seedPotentialDBFile; - private kelondroMap seedActiveDB, seedPassiveDB, seedPotentialDB; + protected kelondroMap seedActiveDB, seedPassiveDB, seedPotentialDB; private int seedDBBufferKB; private long preloadTime; @@ -210,7 +210,7 @@ public final class yacySeedDB { return new kelondroMap(new kelondroDyn(seedDBFile, (seedDBBufferKB * 0x400) / 3, preloadTime / 3, commonHashLength, 480, '#', true), sortFields, accFields); } - private synchronized kelondroMap resetSeedTable(kelondroMap seedDB, File seedDBFile) { + protected synchronized kelondroMap resetSeedTable(kelondroMap seedDB, File seedDBFile) { // this is an emergency function that should only be used if any problem with the // seed.db is detected yacyCore.log.logFine("seed-db " + seedDBFile.toString() + " reset (on-the-fly)");