From bde748f68f126a13104658695a86905c35cc29fc Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 21 Jun 2006 14:16:34 +0000 Subject: [PATCH] - integrated buffer in RowCollectionSet which speeds up the new cache by factor 3. (but the new cache was 5 times slower than the old cache...) - removed old unused code git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2231 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../anomic/kelondro/kelondroBytesIntMap.java | 2 +- .../kelondro/kelondroFScoreCluster.java | 154 ------------- .../anomic/kelondro/kelondroIntBytesMap.java | 2 +- .../anomic/kelondro/kelondroMScoreIndex.java | 216 ------------------ .../de/anomic/kelondro/kelondroRecords.java | 10 +- .../kelondro/kelondroRowBufferedSet.java | 79 ++++--- 6 files changed, 55 insertions(+), 408 deletions(-) delete mode 100644 source/de/anomic/kelondro/kelondroFScoreCluster.java delete mode 100644 source/de/anomic/kelondro/kelondroMScoreIndex.java diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index 86fbbcdef..70451da3c 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -24,7 +24,7 @@ package de.anomic.kelondro; -public class kelondroBytesIntMap extends kelondroRowSet { +public class kelondroBytesIntMap extends kelondroRowBufferedSet { public kelondroBytesIntMap(int keySize, int initSize) { super(new kelondroRow(new int[]{keySize, 4}), initSize); diff --git a/source/de/anomic/kelondro/kelondroFScoreCluster.java b/source/de/anomic/kelondro/kelondroFScoreCluster.java deleted file mode 100644 index cf41f3fc9..000000000 --- a/source/de/anomic/kelondro/kelondroFScoreCluster.java +++ /dev/null @@ -1,154 +0,0 @@ -// kelondroScore.java -// ----------------------- -// part of The Kelondro Database -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// last major change: 28.09.2004 -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - -/* - * This class manages counted words, - * in a word-count table. - * word counts can be increased, and the words can be enumerated - * in order of their count. - */ - - -package de.anomic.kelondro; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; - -public class kelondroFScoreCluster { - - private static final int wordlength = 32; - private static final int countlength = 6; - //private static final int nodesize = 4048; - private kelondroTree refcountDB; - private kelondroTree countrefDB; - - public kelondroFScoreCluster(File refcountDBfile, File countrefDBfile, kelondroOrder objectOrder, boolean exitOnFail) { - if ((refcountDBfile.exists()) && (countrefDBfile.exists())) try { - refcountDB = new kelondroTree(refcountDBfile, 0x100000L, kelondroTree.defaultObjectCachePercent); - refcountDB.setText(0, kelondroBase64Order.enhancedCoder.encodeLong(0, countlength).getBytes()); // counter of all occurrences - countrefDB = new kelondroTree(countrefDBfile, 0x100000L, kelondroTree.defaultObjectCachePercent); - countrefDB.setText(0, kelondroBase64Order.enhancedCoder.encodeLong(0, countlength).getBytes()); - } catch (IOException e) { - refcountDBfile.delete(); - countrefDBfile.delete(); - refcountDB = new kelondroTree(refcountDBfile, 0x100000L, kelondroTree.defaultObjectCachePercent, new kelondroRow(new int[] {wordlength, countlength}), objectOrder, 1, countlength, exitOnFail); - countrefDB = new kelondroTree(countrefDBfile, 0x100000L, kelondroTree.defaultObjectCachePercent, new kelondroRow(new int[] {countlength + wordlength, 4}), objectOrder, 1, countlength, exitOnFail); - } else if ((!(refcountDBfile.exists())) && (!(countrefDBfile.exists()))) { - refcountDB = new kelondroTree(refcountDBfile, 0x100000L, kelondroTree.defaultObjectCachePercent, new kelondroRow(new int[] {wordlength, countlength}), objectOrder, 1, countlength, exitOnFail); - countrefDB = new kelondroTree(countrefDBfile, 0x100000L, kelondroTree.defaultObjectCachePercent, new kelondroRow(new int[] {countlength + wordlength, 4}), objectOrder, 1, countlength, exitOnFail); - } else { - if (exitOnFail) { - System.exit(-1); - } else { - throw new RuntimeException("both word/count db files must exists"); - } - } - } - - public void addScore(String word) throws IOException { - word = word.toLowerCase(); - kelondroRow.Entry record = refcountDB.get(word.getBytes()); - long c; - String cs; - if (record == null) { - // new entry - c = 0; - } else { - // delete old entry - c = record.getColLongB64E(1); - cs = kelondroBase64Order.enhancedCoder.encodeLong(c, countlength); - countrefDB.remove((cs + word).getBytes()); - c++; - } - cs = kelondroBase64Order.enhancedCoder.encodeLong(c, countlength); - refcountDB.put(word.getBytes(), cs.getBytes()); - countrefDB.put((cs + word).getBytes(), new byte[] {0,0,0,0}); - // increase overall counter - refcountDB.setText(0, kelondroBase64Order.enhancedCoder.encodeLong(getTotalCount() + 1, countlength).getBytes()); - } - - public long getTotalCount() { - return kelondroBase64Order.enhancedCoder.decodeLong(new String(refcountDB.getText(0))); - } - - public int getElementCount() { - return refcountDB.size(); - } - - public long getScore(String word) throws IOException { - word = word.toLowerCase(); - kelondroRow.Entry record = refcountDB.get(word.getBytes()); - if (record == null) { - return 0; - } else { - return record.getColLongB64E(1); - } - } - - public Iterator scores(boolean up) throws IOException { - // iterates '-' Strings - return new scoreIterator(up, false); - } - - private class scoreIterator implements Iterator { - // iteration of score objects - - Iterator iterator; - - public scoreIterator(boolean up, boolean rotating) throws IOException { - iterator = countrefDB.rows(up, rotating, null); - } - - public boolean hasNext() { - return iterator.hasNext(); - } - - public Object next() { - String s = new String(((kelondroRow.Entry) iterator.next()).getColString(0, null)); - return s.substring(countlength) + "-" + kelondroBase64Order.enhancedCoder.decodeLong(s.substring(0, countlength)); - } - - public void remove() { - } - - } -} diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index 5916904a8..104ab7156 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -24,7 +24,7 @@ package de.anomic.kelondro; -public class kelondroIntBytesMap extends kelondroRowSet { +public class kelondroIntBytesMap extends kelondroRowBufferedSet { public kelondroIntBytesMap(int payloadSize, int initSize) { super(new kelondroRow(new int[]{4, payloadSize}), initSize); diff --git a/source/de/anomic/kelondro/kelondroMScoreIndex.java b/source/de/anomic/kelondro/kelondroMScoreIndex.java deleted file mode 100644 index 29745f5dd..000000000 --- a/source/de/anomic/kelondro/kelondroMScoreIndex.java +++ /dev/null @@ -1,216 +0,0 @@ -// kelondroMScoreIndex.java -// ----------------------- -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// last major change: 28.09.2004 -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - -package de.anomic.kelondro; - -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.TreeMap; - -public class kelondroMScoreIndex { - - private kelondroMScoreCluster scoreCluster; - private HashMap objects; // encnt/object - relation - private TreeMap handles; // encnt/encnt - relation, ordered by objects - private int encnt; - - protected class objcomp implements Comparator { - private HashMap os; - public objcomp(HashMap objs) { - os = objs; - } - public int compare(Object o1, Object o2) { - if (o1 instanceof Integer) o1 = os.get(o1); - if (o2 instanceof Integer) o2 = os.get(o2); - return ((Comparable) o1).compareTo(o2); - } - public boolean equals(Object obj) { - return false; - } - } - - public kelondroMScoreIndex() { - encnt = 0; - objects = new HashMap(); // storage space for values - handles = new TreeMap(new objcomp(objects)); // int-handle/value relation - scoreCluster = new kelondroMScoreCluster(); // scores for int-handles - } - - public synchronized long totalCount() { - return scoreCluster.totalCount(); - } - - public synchronized int size() { - return handles.size(); - } - - public synchronized void incScore(Object[] objs) { - addScore(objs, 1); - } - - public synchronized void addScore(Object[] objs, int count) { - if (objs != null) - for (int i = 0; i < objs.length; i++) - addScore(objs[i], count); - } - - public synchronized void setScore(Object[] objs, int count) { - if (objs != null) - for (int i = 0; i < objs.length; i++) - setScore(objs[i], count); - } - - public synchronized void incScore(Object obj) { - addScore(obj, 1); - } - - public synchronized void addScore(Object obj, int count) { - // get handle - Integer handle = (Integer) handles.get(obj); - if (handle == null) { - // new object - handle = new Integer(encnt++); - objects.put(handle, obj); - handles.put(handle, handle); - } - // add score - scoreCluster.addScore(handle, count); - } - - public synchronized void setScore(Object obj, int count) { - // get handle - Integer handle = (Integer) handles.get(obj); - if (handle == null) { - // new object - handle = new Integer(encnt++); - objects.put(handle, obj); - handles.put(handle, handle); - } - // set score - scoreCluster.setScore(handle, count); - } - - public synchronized void deleteScore(Object obj) { - // get handle - Integer handle = (Integer) handles.get(obj); - if (handle != null) { - objects.remove(handle); - handles.remove(handle); - scoreCluster.deleteScore(handle); - } - } - - public synchronized int getScore(Object obj) { - // get handle - Integer handle = (Integer) handles.get(obj); - if (handle == null) return -1; - return scoreCluster.getScore(handle); - } - - public synchronized Object[] getScores(int count, boolean up, boolean weight, char weightsep) { - return new Object[1]; - } - - public synchronized Object[] getScores(int maxCount, boolean up) { - return getScores(maxCount, up, Integer.MIN_VALUE, Integer.MAX_VALUE); - } - - public synchronized Object[] getScores(int maxCount, boolean up, int minScore, int maxScore) { - if (maxCount > handles.size()) maxCount = handles.size(); - Object[] s = new Object[maxCount]; - Iterator it = scores(up, minScore, maxScore); - int i = 0; - while ((i < maxCount) && (it.hasNext())) s[i++] = it.next(); - if (i < maxCount) { - // re-copy the result array - Object[] sc = new Object[i]; - System.arraycopy(s, 0, sc, 0, i); - s = sc; - sc = null; - } - return s; - } - - public synchronized Iterator scores(boolean up) { - return scores(up, Integer.MIN_VALUE, Integer.MAX_VALUE); - } - - public synchronized Iterator scores(boolean up, int minScore, int maxScore) { - return new scoreIterator(up, minScore, maxScore); - } - - private class scoreIterator implements Iterator { - - Iterator scoreClusterIterator; - - public scoreIterator(boolean up, int minScore, int maxScore) { - this.scoreClusterIterator = scoreCluster.scores(up, minScore, maxScore); - } - - public boolean hasNext() { - return scoreClusterIterator.hasNext(); - } - - public Object next() { - return objects.get(scoreClusterIterator.next()); - } - - public void remove() { - scoreClusterIterator.remove(); - } - } - - public static void main(String[] args) { - System.out.println("Test for Score: start"); - long time = System.currentTimeMillis(); - kelondroMScoreIndex s = new kelondroMScoreIndex(); - for (int i = 0; i < 10000; i++) s.addScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx", i/10); - System.out.println("result:"); - Object[] result; - result = s.getScores(s.size(), true); - for (int i = 0; i < s.size(); i++) System.out.println("up: " + result[i]); - result = s.getScores(s.size(), false); - for (int i = 0; i < s.size(); i++) System.out.println("down: " + result[i]); - System.out.println("Test for Score: finish. time = " + (System.currentTimeMillis() - time)); - System.out.println("total=" + s.totalCount() + ", element=" + s.size()); - } - -} diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index d4ebccb7e..607cacb73 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -525,11 +525,10 @@ public class kelondroRecords { // remove handle from cache-control cacheScore.deleteScore(handle); cacheHeaders[CP_HIGH].removeb(handle.index); - } else if (cacheHeaders[CP_MEDIUM].getb(handle.index) != null) { - // no cache control for medium-priority entries - cacheHeaders[CP_MEDIUM].removeb(handle.index); - } else if (cacheHeaders[CP_LOW].getb(handle.index) != null) { + } else { + // no cache control for medium-priority entries and // no cache control for low-priority entries + cacheHeaders[CP_MEDIUM].removeb(handle.index); cacheHeaders[CP_LOW].removeb(handle.index); } cacheDelete++; @@ -672,6 +671,9 @@ public class kelondroRecords { // if space left in cache, copy these value to the cache update2Cache(cp); } else { + //System.out.print("CACHE HIT FOR INDEX " + this.handle.index + ": "); + //for (int i = 0; i < cacheEntry.length; i++) System.out.print(cacheEntry[i] + ", "); + //System.out.println(); // cache hit, copy overhead and key from cache readHit++; //System.out.println("**CACHE HIT for " + this.handle.index + "**"); diff --git a/source/de/anomic/kelondro/kelondroRowBufferedSet.java b/source/de/anomic/kelondro/kelondroRowBufferedSet.java index 50dc7828c..b4e9e60f6 100644 --- a/source/de/anomic/kelondro/kelondroRowBufferedSet.java +++ b/source/de/anomic/kelondro/kelondroRowBufferedSet.java @@ -24,31 +24,36 @@ package de.anomic.kelondro; -import java.util.HashMap; +import java.util.TreeMap; +import java.util.Map; import java.util.Iterator; import java.util.Random; public class kelondroRowBufferedSet extends kelondroRowSet { - private static final int bufferFlushLimit = 100; - private HashMap buffer; + private static final int bufferFlushLimit = 10000; + private final boolean useRowCollection = true; + private TreeMap buffer; // this must be a TreeSet bacause HashMap does not work with byte[] public kelondroRowBufferedSet(kelondroRow rowdef) { super(rowdef); - buffer = new HashMap(); + buffer = new TreeMap(kelondroNaturalOrder.naturalOrder); } public kelondroRowBufferedSet(kelondroRow rowdef, int objectCount) { super(rowdef, objectCount); - buffer = new HashMap(); + buffer = new TreeMap(kelondroNaturalOrder.naturalOrder); } private final void flush() { // call only in synchronized environment - Iterator i = buffer.values().iterator(); + Iterator i = buffer.entrySet().iterator(); + Map.Entry entry; while (i.hasNext()) { - super.add((kelondroRow.Entry) i.next()); + entry = (Map.Entry) i.next(); + super.add((kelondroRow.Entry) entry.getValue()); } + buffer.clear(); } public final void trim() { @@ -119,47 +124,59 @@ public class kelondroRowBufferedSet extends kelondroRowSet { public kelondroRow.Entry get(byte[] key) { synchronized (buffer) { - kelondroRow.Entry entry = (kelondroRow.Entry) buffer.get(key); - if (entry != null) return entry; - return super.get(key); + if (useRowCollection) { + kelondroRow.Entry entry = (kelondroRow.Entry) buffer.get(key); + if (entry != null) return entry; + return super.get(key); + } else { + return (kelondroRow.Entry) buffer.get(key); + } } } public kelondroRow.Entry put(kelondroRow.Entry newentry) { byte[] key = newentry.getColBytes(super.sortColumn); synchronized (buffer) { - kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.get(key); - if (oldentry == null) { - // try the collection - oldentry = super.get(key); + if (useRowCollection) { + kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.get(key); if (oldentry == null) { - // this was not anywhere - buffer.put(key, newentry); - if (buffer.size() > bufferFlushLimit) flush(); - return null; + // try the collection + oldentry = super.get(key); + if (oldentry == null) { + // this was not anywhere + buffer.put(key, newentry); + if (buffer.size() > bufferFlushLimit) flush(); + return null; + } else { + // replace old entry + super.put(newentry); + return oldentry; + } } else { - // replace old entry - super.put(newentry); + // the entry is already in buffer + // simply replace old entry + buffer.put(key, newentry); return oldentry; } } else { - // the entry is already in buffer - // simply replace old entry - buffer.put(key, newentry); - return oldentry; + return (kelondroRow.Entry) buffer.put(key, newentry); } } } public kelondroRow.Entry remove(byte[] a) { synchronized (buffer) { - kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.remove(a); - if (oldentry == null) { - // try the collection - return super.remove(a); + if (useRowCollection) { + kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.remove(a); + if (oldentry == null) { + // try the collection + return super.remove(a); + } else { + // the entry was in buffer + return oldentry; + } } else { - // the entry was in buffer - return oldentry; + return (kelondroRow.Entry) buffer.remove(a); // test } } } @@ -172,8 +189,6 @@ public class kelondroRowBufferedSet extends kelondroRowSet { } } - - public static void main(String[] args) { String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; kelondroRowBufferedSet c = new kelondroRowBufferedSet(new kelondroRow(new int[]{10, 3}));