diff --git a/source/de/anomic/kelondro/kelondroAbstractOrder.java b/source/de/anomic/kelondro/kelondroAbstractOrder.java index f1f90dd15..43a158c45 100644 --- a/source/de/anomic/kelondro/kelondroAbstractOrder.java +++ b/source/de/anomic/kelondro/kelondroAbstractOrder.java @@ -59,6 +59,8 @@ public abstract class kelondroAbstractOrder implements kelondroOrder { return compare((byte[]) a, (byte[]) b); } else if ((a instanceof Node) && (b instanceof Node)) { return compare(((Node) a).getKey(), ((Node) b).getKey()); + } else if ((a instanceof String) && (b instanceof String)) { + return compare(((String) a).getBytes(), ((String) b).getBytes()); } else throw new IllegalArgumentException("Object type or Object type combination not supported"); } diff --git a/source/de/anomic/kelondro/kelondroBase64Order.java b/source/de/anomic/kelondro/kelondroBase64Order.java index a81e4fa0b..d31b0499a 100644 --- a/source/de/anomic/kelondro/kelondroBase64Order.java +++ b/source/de/anomic/kelondro/kelondroBase64Order.java @@ -66,18 +66,19 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond } } - public static final kelondroBase64Order standardCoder = new kelondroBase64Order(true); - public static final kelondroBase64Order enhancedCoder = new kelondroBase64Order(false); - - final boolean rfc1113compliant; + public static final kelondroBase64Order standardCoder = new kelondroBase64Order(true, true); + public static final kelondroBase64Order enhancedCoder = new kelondroBase64Order(true, false); + private boolean rfc1113compliant; + private boolean asc; private final char[] alpha; private final byte[] ahpla; - public kelondroBase64Order(boolean rfc1113compliant) { + public kelondroBase64Order(boolean up, boolean rfc1113compliant) { // if we choose not to be rfc1113compliant, // then we get shorter base64 results which are also filename-compatible this.rfc1113compliant = rfc1113compliant; + this.asc = up; alpha = (rfc1113compliant) ? alpha_standard : alpha_enhanced; ahpla = (rfc1113compliant) ? ahpla_standard : ahpla_enhanced; } @@ -221,10 +222,8 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond final int bl = b.length; final int len = (al > bl) ? bl : al; while (i < len) { - if (ahpla[a[i]] > ahpla[b[i]]) - return 1; - if (ahpla[a[i]] < ahpla[b[i]]) - return -1; + if (ahpla[a[i]] > ahpla[b[i]]) return (asc) ? 1 : -1; + if (ahpla[a[i]] < ahpla[b[i]]) return (asc) ? -1 : 1; // else the bytes are equal and it may go on yet undecided i++; } @@ -232,14 +231,14 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond if ((i == al) && (i < bl) && (b[i] == 0)) return 0; if ((i == bl) && (i < al) && (a[i] == 0)) return 0; // no, decide by length - if (al > bl) return 1; - if (al < bl) return -1; + if (al > bl) return (asc) ? 1 : -1; + if (al < bl) return (asc) ? -1 : 1; // no, they are equal return 0; } public static void main(String[] s) { - kelondroBase64Order b64 = new kelondroBase64Order(true); + kelondroBase64Order b64 = new kelondroBase64Order(true, true); if (s.length == 0) { System.out.println("usage: -[ec|dc|es|ds|s2m] "); System.exit(0); diff --git a/source/de/anomic/kelondro/kelondroBinSearch.java b/source/de/anomic/kelondro/kelondroBinSearch.java index d2e9c7944..7a8d791e6 100644 --- a/source/de/anomic/kelondro/kelondroBinSearch.java +++ b/source/de/anomic/kelondro/kelondroBinSearch.java @@ -52,7 +52,7 @@ public class kelondroBinSearch { private int chunksize; private byte[] buffer; private int count; - private kelondroOrder objectOrder = new kelondroNaturalOrder(); + private kelondroOrder objectOrder = new kelondroNaturalOrder(true); public kelondroBinSearch(byte[] chunks, int chunksize) { this.chunks = chunks; diff --git a/source/de/anomic/kelondro/kelondroMSetTools.java b/source/de/anomic/kelondro/kelondroMSetTools.java index 62fcb764b..a253eed9d 100644 --- a/source/de/anomic/kelondro/kelondroMSetTools.java +++ b/source/de/anomic/kelondro/kelondroMSetTools.java @@ -53,7 +53,7 @@ import java.util.TreeSet; public class kelondroMSetTools { - public static Comparator fastStringComparator = fastStringComparator(true); + //public static Comparator fastStringComparator = fastStringComparator(true); // ------------------------------------------------------------------------------------------------ // helper methods @@ -324,38 +324,6 @@ public class kelondroMSetTools { // ------------------------------------------------------------------------------------------------ - public static Comparator fastStringComparator(boolean ascending) { - return new stringComparator(ascending); - } - - private static class stringComparator implements Comparator { - // fast ordering - boolean asc = true; - public stringComparator(boolean ascending) { - asc = ascending; - } - public int compare(Object o1, Object o2) { - // returns o1o2:1 - int l1 = ((String) o1).length(); - int l2 = ((String) o2).length(); - if (l1 == l2) { - for (int i = 0; i < l1; i++) { - if (((byte) ((String) o1).charAt(i)) < ((byte) ((String) o2).charAt(i))) return (asc) ? -1 : 1; - if (((byte) ((String) o1).charAt(i)) > ((byte) ((String) o2).charAt(i))) return (asc) ? 1 : -1; - } - return 0; - //return ((String) o1).compareTo((String) o2); - } else { - return l1 < l2 ? ((asc) ? -1 : 1) : ((asc) ? 1 : -1); - } - } - public boolean equals(Object obj) { - return false; - } - } - - // ------------------------------------------------------------------------------------------------ - public static TreeMap loadMap(String filename, String sep) { TreeMap map = new TreeMap(); BufferedReader br = null; @@ -375,8 +343,8 @@ public class kelondroMSetTools { return map; } - public static TreeSet loadList(File file) { - TreeSet list = new TreeSet(kelondroMSetTools.fastStringComparator); + public static TreeSet loadList(File file, Comparator c) { + TreeSet list = new TreeSet(c); if (!(file.exists())) return list; BufferedReader br = null; diff --git a/source/de/anomic/kelondro/kelondroMergeIterator.java b/source/de/anomic/kelondro/kelondroMergeIterator.java index e3b69844b..8785a9193 100644 --- a/source/de/anomic/kelondro/kelondroMergeIterator.java +++ b/source/de/anomic/kelondro/kelondroMergeIterator.java @@ -53,12 +53,12 @@ public class kelondroMergeIterator implements Iterator { String na, nb; boolean up; - public kelondroMergeIterator(Iterator a, Iterator b, boolean up) { + public kelondroMergeIterator(Iterator a, Iterator b, Comparator c, boolean up) { // this works currently only for String-type key iterations this.a = a; this.b = b; this.up = up; - this.comp = kelondroMSetTools.fastStringComparator(up); + this.comp = c; nexta(); nextb(); } @@ -125,19 +125,19 @@ public class kelondroMergeIterator implements Iterator { throw new java.lang.UnsupportedOperationException("merge does not support remove"); } - public static Iterator cascade(Set /*of*/ iterators, boolean up) { + public static Iterator cascade(Set /*of*/ iterators, Comparator c,boolean up) { // this extends the ability to combine two iterators // to the abiliy of combining a set of iterators if (iterators == null) return null; if (iterators.size() == 0) return null; - return cascade(iterators.iterator(), up); + return cascade(iterators.iterator(), c, up); } - private static Iterator cascade(Iterator /*of*/ iiterators, boolean up) { + private static Iterator cascade(Iterator /*of*/ iiterators, Comparator c, boolean up) { if (iiterators == null) return null; if (!(iiterators.hasNext())) return null; Iterator one = (Iterator) iiterators.next(); if (!(iiterators.hasNext())) return one; - return new kelondroMergeIterator(one, cascade(iiterators, up), up); + return new kelondroMergeIterator(one, cascade(iiterators, c, up), c, up); } } diff --git a/source/de/anomic/kelondro/kelondroNaturalOrder.java b/source/de/anomic/kelondro/kelondroNaturalOrder.java index d7b29cb7b..85fdba178 100644 --- a/source/de/anomic/kelondro/kelondroNaturalOrder.java +++ b/source/de/anomic/kelondro/kelondroNaturalOrder.java @@ -49,7 +49,12 @@ import java.util.Comparator; public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelondroOrder, Comparator { - public kelondroNaturalOrder() { + boolean asc; + + public static final Comparator naturalOrder = new kelondroNaturalOrder(true); + + public kelondroNaturalOrder(boolean ascending) { + this.asc = ascending; } public long cardinal(byte[] key) { @@ -68,7 +73,7 @@ public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelon // two arrays are also equal if one array is a subset of the other's array // with filled-up char(0)-values public int compare(byte[] a, byte[] b) { - return compares(a, b); + return (asc) ? compares(a, b) : compares(b, a); } public static final int compares(byte[] a, byte[] b) { @@ -98,7 +103,7 @@ public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelon byte[] t = new byte[12]; for (int i = 0; i < 12; i++) t[i] = (byte) 255; t[0] = (byte) 127; - kelondroOrder o = new kelondroNaturalOrder(); + kelondroOrder o = new kelondroNaturalOrder(true); System.out.println(o.partition(t, 16)); } diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index f6d35a70e..3631ad333 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -75,7 +75,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { private static int root = 0; // pointer for FHandles-array: pointer to root node private Search writeSearchObj = new Search(); - private kelondroOrder objectOrder = new kelondroNaturalOrder(); + private kelondroOrder objectOrder = new kelondroNaturalOrder(true); public kelondroTree(File file, long buffersize, int key, int value, boolean exitOnFail) { this(file, buffersize, new int[] { key, value }, 1, 8, exitOnFail); diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java index f369fad7a..594c92cc0 100644 --- a/source/de/anomic/plasma/plasmaCondenser.java +++ b/source/de/anomic/plasma/plasmaCondenser.java @@ -64,6 +64,7 @@ import java.util.TreeSet; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterOutputStream; import de.anomic.kelondro.kelondroMSetTools; +import de.anomic.kelondro.kelondroNaturalOrder; public final class plasmaCondenser { @@ -142,7 +143,7 @@ public final class plasmaCondenser { private void createCondensement(InputStream is) { - words = new TreeMap(kelondroMSetTools.fastStringComparator); + words = new TreeMap(kelondroNaturalOrder.naturalOrder); sentences = new HashMap(); HashSet currsentwords = new HashSet(); StringBuffer sentence = new StringBuffer(100); @@ -405,7 +406,7 @@ public final class plasmaCondenser { // we reconstruct the word hashtable // and sort the entries by the number of occurrences // this structure is needed to print out a sorted list of words - TreeMap sortedWords = new TreeMap(kelondroMSetTools.fastStringComparator); + TreeMap sortedWords = new TreeMap(kelondroNaturalOrder.naturalOrder); it = words.entrySet().iterator(); // enumerates the keys in ascending order while (it.hasNext()) { entry = (Map.Entry) it.next(); diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index 7387d48fb..24754ca3c 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -48,6 +48,7 @@ import java.util.Iterator; import de.anomic.htmlFilter.htmlFilterAbstractScraper; import de.anomic.kelondro.kelondroMSetTools; +import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.server.serverByteBuffer; public final class plasmaSearchQuery { @@ -132,7 +133,7 @@ public final class plasmaSearchQuery { } // the string is clean now, but we must generate a set out of it - final TreeSet query = new TreeSet(kelondroMSetTools.fastStringComparator); + final TreeSet query = new TreeSet(kelondroNaturalOrder.naturalOrder); if (words.length() == 0) return query; // split returns always one element final String[] a = words.split(" "); for (int i = 0; i < a.length; i++) { query.add(a[i]); } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 79fe4f79f..343370584 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -130,6 +130,7 @@ import de.anomic.http.httpc; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMSetTools; +import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroTables; import de.anomic.server.serverAbstractSwitch; import de.anomic.server.serverCodings; @@ -260,7 +261,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // read only once upon first instantiation of this class String f = getConfig("plasmaBlueList", null); File plasmaBlueListFile = new File(f); - if (f != null) blueList = kelondroMSetTools.loadList(plasmaBlueListFile); else blueList= new TreeSet(); + if (f != null) blueList = kelondroMSetTools.loadList(plasmaBlueListFile, kelondroNaturalOrder.naturalOrder); else blueList= new TreeSet(); this.log.logConfig("loaded blue-list from file " + plasmaBlueListFile.getName() + ", " + blueList.size() + " entries, " + ppRamString(plasmaBlueListFile.length()/1024)); @@ -280,7 +281,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // load stopwords if (stopwords == null) { File stopwordsFile = new File(rootPath, "yacy.stopwords"); - stopwords = kelondroMSetTools.loadList(stopwordsFile); + stopwords = kelondroMSetTools.loadList(stopwordsFile, kelondroNaturalOrder.naturalOrder); this.log.logConfig("loaded stopwords from file " + stopwordsFile.getName() + ", " + stopwords.size() + " entries, " + ppRamString(stopwordsFile.length()/1024)); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 2b5c21ea2..e53b99cc0 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -59,6 +59,7 @@ import java.net.URL; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroMSetTools; +import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -279,7 +280,8 @@ public final class plasmaWordIndex { public iterateFiles(String startHash, boolean up, boolean deleteEmpty) { this.hierarchy = new ArrayList(); - this.comp = kelondroMSetTools.fastStringComparator(up); + this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering + //this.comp = new kelondroBase64Order(up, false); this.delete = deleteEmpty; // the we initially fill the hierarchy with the content of the root folder diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index ced5c9e53..58743268b 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -50,6 +50,7 @@ import java.io.File; import java.util.HashSet; import java.util.Iterator; +import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRecords; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.server.logging.serverLog; @@ -216,7 +217,7 @@ public final class plasmaWordIndexAssortmentCluster { public Iterator hashConjunction(String startWordHash, boolean up) { HashSet iterators = new HashSet(); for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, true)); - return kelondroMergeIterator.cascade(iterators, up); + return kelondroMergeIterator.cascade(iterators, kelondroNaturalOrder.naturalOrder, up); } public int sizeTotal() { diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index f06f0b036..e441ab7e6 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -53,6 +53,7 @@ import de.anomic.kelondro.kelondroArray; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMergeIterator; +import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRecords; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -267,8 +268,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { new kelondroMergeIterator( cache.tailMap(startWordHash).keySet().iterator(), assortmentCluster.hashConjunction(startWordHash, true), + kelondroNaturalOrder.naturalOrder, true), backend.wordHashes(startWordHash, true), + kelondroNaturalOrder.naturalOrder, true); } diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java index 008f5dbd3..c214dabb0 100644 --- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java +++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java @@ -50,6 +50,7 @@ import java.util.Iterator; import java.util.TreeSet; import de.anomic.kelondro.kelondroMSetTools; +import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -83,7 +84,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface { public iterateFiles(String startHash, boolean up) { this.hierarchy = new ArrayList(); - this.comp = kelondroMSetTools.fastStringComparator(up); + this.comp = new kelondroNaturalOrder(up); // the we initially fill the hierarchy with the content of the root folder String path = "WORDS"; diff --git a/source/yacy.java b/source/yacy.java index 03f316e53..05b99ca65 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -77,6 +77,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaURLPool; import de.anomic.plasma.plasmaWordIndex; +import de.anomic.plasma.plasmaWordIndexAssortment; import de.anomic.plasma.plasmaWordIndexAssortmentCluster; import de.anomic.plasma.plasmaWordIndexCache; import de.anomic.plasma.plasmaWordIndexClassicDB; @@ -1282,6 +1283,11 @@ public final class yacy { plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log); WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true); } + if (resource.startsWith("assortment")) { + int a = Integer.parseInt(resource.substring(10)); + plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, null); + WordHashIterator = assortment.hashes(wordChunkStartHash, true, false); + } if (resource.equals("words")) { plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(homeDBroot, log); WordHashIterator = fileDB.wordHashes(wordChunkStartHash, true);